{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE ParallelListComp #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE NondecreasingIndentation #-}

{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

-- Generating machine code (instruction selection)
-- (c) The University of Glasgow 1996-2004

-- This is a big module, but, if you pay attention to
-- (a) the sectioning, and (b) the type signatures, the
-- structure should not be too overwhelming.

module GHC.CmmToAsm.X86.CodeGen (


-- NCG stuff:
import GHC.Prelude

import GHC.CmmToAsm.X86.Instr
import GHC.CmmToAsm.X86.Cond
import GHC.CmmToAsm.X86.Regs
import GHC.CmmToAsm.X86.Ppr
import GHC.CmmToAsm.X86.RegInfo

import GHC.Platform.Regs
import GHC.CmmToAsm.CPrim
import GHC.CmmToAsm.Types
import GHC.Cmm.DebugBlock
   ( DebugBlock(..), UnwindPoint(..), UnwindTable
   , UnwindExpr(UwReg), toUnwindExpr
import GHC.CmmToAsm.PIC
import GHC.CmmToAsm.Monad
   ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat
   , getDeltaNat, getBlockIdNat, getPicBaseNat
   , Reg64(..), RegCode64(..), getNewReg64, localReg64
   , getPicBaseMaybeNat, getDebugBlock, getFileId
   , addImmediateSuccessorNat, updateCfgNat, getConfig, getPlatform
   , getCfgWeights
import GHC.CmmToAsm.CFG
import GHC.CmmToAsm.Format
import GHC.CmmToAsm.Config
import GHC.Platform.Reg
import GHC.Platform

-- Our intermediate code:
import GHC.Types.Basic
import GHC.Cmm.BlockId
import GHC.Unit.Types ( primUnitId )
import GHC.Cmm.Utils
import GHC.Cmm.Switch
import GHC.Cmm
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Graph
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.CLabel
import GHC.Types.Tickish ( GenTickish(..) )
import GHC.Types.SrcLoc  ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )

-- The rest:
import GHC.Data.Maybe ( expectJust )
import GHC.Types.ForeignCall ( CCallConv(..) )
import GHC.Data.OrdList
import GHC.Utils.Outputable
import GHC.Utils.Constants (debugIsOn)
import GHC.Utils.Monad ( foldMapM )
import GHC.Utils.Panic
import GHC.Data.FastString
import GHC.Utils.Misc
import GHC.Types.Unique.DSM ( getUniqueM )

import qualified Data.Semigroup as S

import Control.Monad
import Control.Monad.Trans.State.Strict
  ( StateT, evalStateT, get, put )
import Control.Monad.Trans.Class (lift)
import Data.Foldable (fold)
import Data.Int
import Data.Maybe
import Data.Word

import qualified Data.Map as Map

is32BitPlatform :: NatM Bool
is32BitPlatform :: NatM Bool
is32BitPlatform = do
    platform <- NatM Platform
    return $ target32Bit platform

sse4_1Enabled :: NatM Bool
sse4_1Enabled :: NatM Bool
sse4_1Enabled = do
  config <- NatM NCGConfig
  return (ncgSseVersion config >= Just SSE4)

sse4_2Enabled :: NatM Bool
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
  config <- NatM NCGConfig
  return (ncgSseVersion config >= Just SSE42)

avxEnabled :: NatM Bool
avxEnabled :: NatM Bool
avxEnabled = do
  config <- NatM NCGConfig
  return (ncgAvxEnabled config)

        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]

cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
cmmTopCodeGen (CmmProc LabelMap RawCmmStatics
info CLabel
lab [GlobalRegUse]
live CmmGraph
graph) = do
  let blocks :: [CmmBlock]
blocks = CmmGraph -> [CmmBlock]
toBlockListEntryFirst CmmGraph
  (nat_blocks,statics) <- (CmmBlock
 -> NatM
      ([NatBasicBlock Instr],
       [NatCmmDecl (Alignment, RawCmmStatics) Instr]))
-> [CmmBlock]
-> NatM
     ([[NatBasicBlock Instr]],
      [[NatCmmDecl (Alignment, RawCmmStatics) Instr]])
forall (m :: * -> *) a b c.
Applicative m =>
(a -> m (b, c)) -> [a] -> m ([b], [c])
mapAndUnzipM CmmBlock
-> NatM
     ([NatBasicBlock Instr],
      [NatCmmDecl (Alignment, RawCmmStatics) Instr])
basicBlockCodeGen [CmmBlock]
  picBaseMb <- getPicBaseMaybeNat
  platform <- getPlatform
  let proc = LabelMap RawCmmStatics
-> CLabel
-> [GlobalRegUse]
-> ListGraph Instr
-> NatCmmDecl (Alignment, RawCmmStatics) Instr
forall d h g.
h -> CLabel -> [GlobalRegUse] -> g -> GenCmmDecl d h g
CmmProc LabelMap RawCmmStatics
info CLabel
lab [GlobalRegUse]
live ([NatBasicBlock Instr] -> ListGraph Instr
forall i. [GenBasicBlock i] -> ListGraph i
ListGraph ([NatBasicBlock Instr] -> ListGraph Instr)
-> [NatBasicBlock Instr] -> ListGraph Instr
forall a b. (a -> b) -> a -> b
$ [[NatBasicBlock Instr]] -> [NatBasicBlock Instr]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[NatBasicBlock Instr]]
      tops = NatCmmDecl (Alignment, RawCmmStatics) Instr
proc NatCmmDecl (Alignment, RawCmmStatics) Instr
-> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
forall a. a -> [a] -> [a]
: [[NatCmmDecl (Alignment, RawCmmStatics) Instr]]
-> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[NatCmmDecl (Alignment, RawCmmStatics) Instr]]
      os   = Platform -> OS
platformOS Platform

  case picBaseMb of
      Just Reg
picBase -> OS
-> Reg
-> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
initializePicBase_x86 OS
os Reg
picBase [NatCmmDecl (Alignment, RawCmmStatics) Instr]
      Maybe Reg
Nothing -> [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return [NatCmmDecl (Alignment, RawCmmStatics) Instr]

cmmTopCodeGen (CmmData Section
sec RawCmmStatics
dat) =
  [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return [Section
-> (Alignment, RawCmmStatics)
-> NatCmmDecl (Alignment, RawCmmStatics) Instr
forall d h g. Section -> d -> GenCmmDecl d h g
CmmData Section
sec (Int -> Alignment
mkAlignment Int
1, RawCmmStatics
dat)]  -- no translation, we just use CmmStatic

{- Note [Verifying basic blocks]
   We want to guarantee a few things about the results
   of instruction selection.

   Namely that each basic blocks consists of:
    * A (potentially empty) sequence of straight line instructions
  followed by
    * A (potentially empty) sequence of jump like instructions.

    We can verify this by going through the instructions and
    making sure that any non-jumpish instruction can't appear
    after a jumpish instruction.

    There are gotchas however:
    * CALLs are strictly speaking control flow but here we care
      not about them. Hence we treat them as regular instructions.

      It's safe for them to appear inside a basic block
      as (ignoring side effects inside the call) they will result in
      straight line code.

    * NEWBLOCK marks the start of a new basic block so can
      be followed by any instructions.

-- Verifying basic blocks is cheap, but not cheap enough to enable it unconditionally.
verifyBasicBlock :: Platform -> [Instr] -> ()
verifyBasicBlock :: Platform -> [Instr] -> ()
verifyBasicBlock Platform
platform [Instr]
  | Bool
debugIsOn     = Bool -> [Instr] -> ()
go Bool
False [Instr]
  | Bool
otherwise     = ()
    go :: Bool -> [Instr] -> ()
go Bool
_     [] = ()
    go Bool
atEnd (Instr
        = case Instr
i of
            -- Start a new basic block
            NEWBLOCK {} -> Bool -> [Instr] -> ()
go Bool
False [Instr]
            -- Calls are not viable block terminators
            CALL {}     | Bool
atEnd -> Instr -> ()
faultyBlockWith Instr
                        | Bool -> Bool
not Bool
atEnd -> Bool -> [Instr] -> ()
go Bool
atEnd [Instr]
            -- All instructions ok, check if we reached the end and continue.
_ | Bool -> Bool
not Bool
atEnd -> Bool -> [Instr] -> ()
go (Instr -> Bool
isJumpishInstr Instr
i) [Instr]
              -- Only jumps allowed at the end of basic blocks.
              | Bool
otherwise -> if Instr -> Bool
isJumpishInstr Instr
                                then Bool -> [Instr] -> ()
go Bool
True [Instr]
                                else Instr -> ()
faultyBlockWith Instr
    faultyBlockWith :: Instr -> ()
faultyBlockWith Instr
        = String -> SDoc -> ()
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Non control flow instructions after end of basic block."
                   (Platform -> Instr -> SDoc
forall doc. IsDoc doc => Platform -> Instr -> doc
pprInstr Platform
platform Instr
i SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"in:" SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ [SDoc] -> SDoc
forall doc. IsDoc doc => [doc] -> doc
vcat ((Instr -> SDoc) -> [Instr] -> [SDoc]
forall a b. (a -> b) -> [a] -> [b]
map (Platform -> Instr -> SDoc
forall doc. IsDoc doc => Platform -> Instr -> doc
pprInstr Platform
platform) [Instr]

        :: CmmBlock
        -> NatM ( [NatBasicBlock Instr]
                , [NatCmmDecl (Alignment, RawCmmStatics) Instr])

basicBlockCodeGen :: CmmBlock
-> NatM
     ([NatBasicBlock Instr],
      [NatCmmDecl (Alignment, RawCmmStatics) Instr])
basicBlockCodeGen CmmBlock
block = do
  let (CmmNode C O
_, Block CmmNode O O
nodes, CmmNode O C
tail)  = CmmBlock -> (CmmNode C O, Block CmmNode O O, CmmNode O C)
forall (n :: Extensibility -> Extensibility -> *).
Block n C C -> (n C O, Block n O O, n O C)
blockSplit CmmBlock
      id :: Label
id = CmmBlock -> Label
forall (x :: Extensibility). Block CmmNode C x -> Label
forall (thing :: Extensibility -> Extensibility -> *)
       (x :: Extensibility).
NonLocal thing =>
thing C x -> Label
entryLabel CmmBlock
      stmts :: [CmmNode O O]
stmts = Block CmmNode O O -> [CmmNode O O]
forall (n :: Extensibility -> Extensibility -> *).
Block n O O -> [n O O]
blockToList Block CmmNode O O
  -- Generate location directive
  dbg <- Label -> NatM (Maybe DebugBlock)
getDebugBlock (CmmBlock -> Label
forall (x :: Extensibility). Block CmmNode C x -> Label
forall (thing :: Extensibility -> Extensibility -> *)
       (x :: Extensibility).
NonLocal thing =>
thing C x -> Label
entryLabel CmmBlock
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote RealSrcSpan
span (LexicalFastString FastString
      -> do fileId <- FastString -> NatM Int
getFileId (RealSrcSpan -> FastString
srcSpanFile RealSrcSpan
            let line = RealSrcSpan -> Int
srcSpanStartLine RealSrcSpan
span; col = RealSrcSpan -> Int
srcSpanStartCol RealSrcSpan
            return $ unitOL $ LOCATION fileId line col (unpackFS name)
    Maybe CmmTickish
_ -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
  (mid_instrs,mid_bid) <- stmtsToInstrs id stmts
  (!tail_instrs,_) <- stmtToInstrs mid_bid tail
  let instrs = InstrBlock
loc_instrs InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
mid_instrs InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
  platform <- getPlatform
  return $! verifyBasicBlock platform (fromOL instrs)
  instrs' <- fold <$> traverse addSpUnwindings instrs
  -- code generation may introduce new basic block boundaries, which
  -- are indicated by the NEWBLOCK instruction.  We must split up the
  -- instruction stream into basic blocks again.  Also, we extract
  -- LDATAs here too.
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'

        mkBlocks (NEWBLOCK Label
id) ([Instr]
instrs,[NatBasicBlock Instr]
blocks,[GenCmmDecl (Alignment, RawCmmStatics) h g]
          = ([], Label -> [Instr] -> NatBasicBlock Instr
forall i. Label -> [i] -> GenBasicBlock i
BasicBlock Label
id [Instr]
instrs NatBasicBlock Instr
-> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
forall a. a -> [a] -> [a]
: [NatBasicBlock Instr]
blocks, [GenCmmDecl (Alignment, RawCmmStatics) h g]
        mkBlocks (LDATA Section
sec (Alignment, RawCmmStatics)
dat) ([Instr]
instrs,[NatBasicBlock Instr]
blocks,[GenCmmDecl (Alignment, RawCmmStatics) h g]
          = ([Instr]
instrs, [NatBasicBlock Instr]
blocks, Section
-> (Alignment, RawCmmStatics)
-> GenCmmDecl (Alignment, RawCmmStatics) h g
forall d h g. Section -> d -> GenCmmDecl d h g
CmmData Section
sec (Alignment, RawCmmStatics)
datGenCmmDecl (Alignment, RawCmmStatics) h g
-> [GenCmmDecl (Alignment, RawCmmStatics) h g]
-> [GenCmmDecl (Alignment, RawCmmStatics) h g]
forall a. a -> [a] -> [a]
:[GenCmmDecl (Alignment, RawCmmStatics) h g]
        mkBlocks Instr
instr ([Instr]
instrs,[NatBasicBlock Instr]
blocks,[GenCmmDecl (Alignment, RawCmmStatics) h g]
          = (Instr
instrInstr -> [Instr] -> [Instr]
forall a. a -> [a] -> [a]
instrs, [NatBasicBlock Instr]
blocks, [GenCmmDecl (Alignment, RawCmmStatics) h g]
  return (BasicBlock id top : other_blocks, statics)

-- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
-- in the @sp@ register. See Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock"
-- for details.
addSpUnwindings :: Instr -> NatM (OrdList Instr)
addSpUnwindings :: Instr -> NatM InstrBlock
addSpUnwindings instr :: Instr
instr@(DELTA Int
d) = do
    config <- NatM NCGConfig
    let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
    if ncgDwarfUnwindings config
        then do lbl <- mkAsmTempLabel <$> getUniqueM
                let unwind = GlobalReg -> Maybe UnwindExpr -> Map GlobalReg (Maybe UnwindExpr)
forall k a. k -> a -> Map k a
Map.singleton GlobalReg
MachSp (UnwindExpr -> Maybe UnwindExpr
forall a. a -> Maybe a
Just (UnwindExpr -> Maybe UnwindExpr) -> UnwindExpr -> Maybe UnwindExpr
forall a b. (a -> b) -> a -> b
$ GlobalRegUse -> Int -> UnwindExpr
UwReg (GlobalReg -> CmmType -> GlobalRegUse
GlobalRegUse GlobalReg
MachSp (Platform -> CmmType
bWord Platform
platform)) (Int -> UnwindExpr) -> Int -> UnwindExpr
forall a b. (a -> b) -> a -> b
$ Int -> Int
forall a. Num a => a -> a
negate Int
                return $ toOL [ instr, UNWIND lbl unwind ]
        else return (unitOL instr)
addSpUnwindings Instr
instr = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> NatM InstrBlock) -> InstrBlock -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ Instr -> InstrBlock
forall a. a -> OrdList a
unitOL Instr

{- Note [Keeping track of the current block]
When generating instructions for Cmm we sometimes require
the current block for things like retry loops.

We also sometimes change the current block, if a MachOP
results in branching control flow.

Issues arise if we have two statements in the same block,
which both depend on the current block id *and* change the
basic block after them. This happens for atomic primops
in the X86 backend where we want to update the CFG data structure
when introducing new basic blocks.

For example in #17334 we got this Cmm code:

        c3Bf: // global
            (_s3t1::I64) = call MO_AtomicRMW W64 AMO_And(_s3sQ::P64 + 88, 18);
            (_s3t4::I64) = call MO_AtomicRMW W64 AMO_Or(_s3sQ::P64 + 88, 0);
            _s3sT::I64 = _s3sV::I64;
            goto c3B1;

This resulted in two new basic blocks being inserted:

                movl $18,%vI_n3Bo
                movq 88(%vI_s3sQ),%rax
                jmp _n3Bp
                cmpxchgq %vI_n3Bq,88(%vI_s3sQ)
                jne _n3Bp
                jmp _n3Bs
                cmpxchgq %vI_n3Bt,88(%vI_s3sQ)
                jne _n3Bs
                jmp _c3B1

Based on the Cmm we called stmtToInstrs we translated both atomic operations under
the assumption they would be placed into their Cmm basic block `c3Bf`.
However for the retry loop we introduce new labels, so this is not the case
for the second statement.
This resulted in a desync between the explicit control flow graph
we construct as a separate data type and the actual control flow graph in the code.

Instead we now return the new basic block if a statement causes a change
in the current block and use the block for all following statements.

For this reason genForeignCall is also split into two parts.  One for calls which
*won't* change the basic blocks in which successive instructions will be
placed (since they only evaluate CmmExpr, which can only contain MachOps, which
cannot introduce basic blocks in their lowerings).  A different one for calls
which *are* known to change the basic block.


-- See Note [Keeping track of the current block] for why
-- we pass the BlockId.
stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in.
              -> [CmmNode O O] -- ^ Cmm Statement
              -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction
stmtsToInstrs :: Label -> [CmmNode O O] -> NatM (InstrBlock, Label)
stmtsToInstrs Label
bid [CmmNode O O]
stmts =
    Label -> [CmmNode O O] -> InstrBlock -> NatM (InstrBlock, Label)
forall {e :: Extensibility} {x :: Extensibility}.
Label -> [CmmNode e x] -> InstrBlock -> NatM (InstrBlock, Label)
go Label
bid [CmmNode O O]
stmts InstrBlock
forall a. OrdList a
    go :: Label -> [CmmNode e x] -> InstrBlock -> NatM (InstrBlock, Label)
go Label
bid  []        InstrBlock
instrs = (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock
    go Label
bid (CmmNode e x
s:[CmmNode e x]
stmts)  InstrBlock
instrs = do
      (instrs',bid') <- Label -> CmmNode e x -> NatM (InstrBlock, Maybe Label)
forall (e :: Extensibility) (x :: Extensibility).
Label -> CmmNode e x -> NatM (InstrBlock, Maybe Label)
stmtToInstrs Label
bid CmmNode e x
      -- If the statement introduced a new block, we use that one
      let !newBid = Label -> Maybe Label -> Label
forall a. a -> Maybe a -> a
fromMaybe Label
bid Maybe Label
      go newBid stmts (instrs `appOL` instrs')

-- | `bid` refers to the current block and is used to update the CFG
--   if new blocks are inserted in the control flow.
-- See Note [Keeping track of the current block] for more details.
stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in.
             -> CmmNode e x
             -> NatM (InstrBlock, Maybe BlockId)
             -- ^ Instructions, and bid of new block if successive
             -- statements are placed in a different basic block.
stmtToInstrs :: forall (e :: Extensibility) (x :: Extensibility).
Label -> CmmNode e x -> NatM (InstrBlock, Maybe Label)
stmtToInstrs Label
bid CmmNode e x
stmt = do
  is32Bit <- NatM Bool
  platform <- getPlatform
  case stmt of
    CmmUnsafeForeignCall ForeignTarget
target [LocalReg]
result_regs [CmmExpr]
       -> ForeignTarget
-> [LocalReg]
-> [CmmExpr]
-> Label
-> NatM (InstrBlock, Maybe Label)
genForeignCall ForeignTarget
target [LocalReg]
result_regs [CmmExpr]
args Label

    CmmNode e x
_ -> (,Maybe Label
forall a. Maybe a
Nothing) (InstrBlock -> (InstrBlock, Maybe Label))
-> NatM InstrBlock -> NatM (InstrBlock, Maybe Label)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> case CmmNode e x
stmt of
      CmmComment FastString
s   -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (FastString -> Instr
COMMENT FastString
      CmmTick {}     -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a

      CmmUnwind [(GlobalReg, Maybe CmmExpr)]
regs -> do
        let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
            to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> Map GlobalReg (Maybe UnwindExpr)
to_unwind_entry (GlobalReg
reg, Maybe CmmExpr
expr) = GlobalReg -> Maybe UnwindExpr -> Map GlobalReg (Maybe UnwindExpr)
forall k a. k -> a -> Map k a
Map.singleton GlobalReg
reg ((CmmExpr -> UnwindExpr) -> Maybe CmmExpr -> Maybe UnwindExpr
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Platform -> CmmExpr -> UnwindExpr
toUnwindExpr Platform
platform) Maybe CmmExpr
        case ((GlobalReg, Maybe CmmExpr) -> Map GlobalReg (Maybe UnwindExpr))
-> [(GlobalReg, Maybe CmmExpr)] -> Map GlobalReg (Maybe UnwindExpr)
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (GlobalReg, Maybe CmmExpr) -> Map GlobalReg (Maybe UnwindExpr)
to_unwind_entry [(GlobalReg, Maybe CmmExpr)]
regs of
          Map GlobalReg (Maybe UnwindExpr)
tbl | Map GlobalReg (Maybe UnwindExpr) -> Bool
forall k a. Map k a -> Bool
Map.null Map GlobalReg (Maybe UnwindExpr)
tbl -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
              | Bool
otherwise    -> do
                  lbl <- Unique -> CLabel
forall a. Uniquable a => a -> CLabel
mkAsmTempLabel (Unique -> CLabel) -> NatM Unique -> NatM CLabel
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM Unique
forall (m :: * -> *). MonadGetUnique m => m Unique
                  return $ unitOL $ UNWIND lbl tbl

      CmmAssign CmmReg
reg CmmExpr
        | CmmType -> Bool
isFloatType CmmType
ty         -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_FltCode CmmReg
reg CmmExpr
        | Bool
is32Bit Bool -> Bool -> Bool
&& CmmType -> Bool
isWord64 CmmType
ty -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_I64Code CmmReg
reg CmmExpr
        | CmmType -> Bool
isVecType CmmType
ty           -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_VecCode CmmReg
reg CmmExpr
        | Bool
otherwise              -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_IntCode CmmReg
reg CmmExpr
          where ty :: CmmType
ty = CmmReg -> CmmType
cmmRegType CmmReg

      CmmStore CmmExpr
addr CmmExpr
src AlignmentSpec
        | CmmType -> Bool
isFloatType CmmType
ty         -> Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_FltCode Format
format CmmExpr
addr CmmExpr
        | Bool
is32Bit Bool -> Bool -> Bool
&& CmmType -> Bool
isWord64 CmmType
ty -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code        CmmExpr
addr CmmExpr
        | CmmType -> Bool
isVecType CmmType
ty           -> Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_VecCode Format
format CmmExpr
addr CmmExpr
        | Bool
otherwise              -> Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_IntCode Format
format CmmExpr
addr CmmExpr
          where ty :: CmmType
ty = Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
                format :: Format
format = CmmType -> Format
cmmTypeFormat CmmType

      CmmBranch Label
id          -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> NatM InstrBlock) -> InstrBlock -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ Label -> InstrBlock
genBranch Label

      --We try to arrange blocks such that the likely branch is the fallthrough
      --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
      CmmCondBranch CmmExpr
arg Label
true Label
false Maybe Bool
_ -> Label -> Label -> Label -> CmmExpr -> NatM InstrBlock
genCondBranch Label
bid Label
true Label
false CmmExpr
      CmmSwitch CmmExpr
arg SwitchTargets
ids -> CmmExpr -> SwitchTargets -> NatM InstrBlock
genSwitch CmmExpr
arg SwitchTargets
      CmmCall { cml_target :: CmmNode O C -> CmmExpr
cml_target = CmmExpr
              , cml_args_regs :: CmmNode O C -> [GlobalRegUse]
cml_args_regs = [GlobalRegUse]
gregs } -> CmmExpr -> [RegWithFormat] -> NatM InstrBlock
genJump CmmExpr
arg (Platform -> [GlobalRegUse] -> [RegWithFormat]
jumpRegs Platform
platform [GlobalRegUse]
      CmmNode e x
_ ->
        String -> NatM InstrBlock
forall a. HasCallStack => String -> a
panic String
"stmtToInstrs: statement should have been cps'd away"

jumpRegs :: Platform -> [GlobalRegUse] -> [RegWithFormat]
jumpRegs :: Platform -> [GlobalRegUse] -> [RegWithFormat]
jumpRegs Platform
platform [GlobalRegUse]
gregs =
  [ Reg -> Format -> RegWithFormat
RegWithFormat (RealReg -> Reg
RegReal RealReg
r) (CmmType -> Format
cmmTypeFormat CmmType
  | GlobalRegUse GlobalReg
gr CmmType
ty <- [GlobalRegUse]
  , Just RealReg
r <- [Platform -> GlobalReg -> Maybe RealReg
globalRegMaybe Platform
platform GlobalReg
gr] ]

-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--      They are really trees of insns to facilitate fast appending, where a
--      left-to-right traversal yields the insns in the correct order.
type InstrBlock
        = OrdList Instr

-- | Condition codes passed up the tree.
data CondCode
        = CondCode Bool Cond InstrBlock

-- | Register's passed up the tree.  If the stix code forces the register
--      to live in a pre-decided machine register, it comes out as @Fixed@;
--      otherwise, it comes out as @Any@, and the parent can decide which
--      register to put it in.
data Register
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)

swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed Format
_ Reg
reg InstrBlock
code) Format
format = Format -> Reg -> InstrBlock -> Register
Fixed Format
format Reg
reg InstrBlock
swizzleRegisterRep (Any Format
_ Reg -> InstrBlock
codefn)     Format
format = Format -> (Reg -> InstrBlock) -> Register
Any   Format
format Reg -> InstrBlock

getLocalRegReg :: LocalReg -> Reg
getLocalRegReg :: LocalReg -> Reg
getLocalRegReg (LocalReg Unique
u CmmType
  = -- by assuming SSE2, Int, Word, Float, Double and vectors all can be register allocated
    VirtualReg -> Reg
RegVirtual (Unique -> Format -> VirtualReg
mkVirtualReg Unique
u (CmmType -> Format
cmmTypeFormat CmmType

-- | Grab the Reg for a CmmReg
getRegisterReg :: Platform  -> CmmReg -> Reg

getRegisterReg :: Platform -> CmmReg -> Reg
getRegisterReg Platform
_   (CmmLocal LocalReg
lreg) = LocalReg -> Reg
getLocalRegReg LocalReg

getRegisterReg Platform
platform  (CmmGlobal GlobalRegUse
  = case Platform -> GlobalReg -> Maybe RealReg
globalRegMaybe Platform
platform (GlobalReg -> Maybe RealReg) -> GlobalReg -> Maybe RealReg
forall a b. (a -> b) -> a -> b
$ GlobalRegUse -> GlobalReg
globalRegUse_reg GlobalRegUse
mid of
        Just RealReg
reg -> RealReg -> Reg
RegReal (RealReg -> Reg) -> RealReg -> Reg
forall a b. (a -> b) -> a -> b
$ RealReg
        Maybe RealReg
Nothing  -> String -> SDoc -> Reg
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegisterReg-memory" (CmmReg -> SDoc
forall a. Outputable a => a -> SDoc
ppr (CmmReg -> SDoc) -> CmmReg -> SDoc
forall a b. (a -> b) -> a -> b
$ GlobalRegUse -> CmmReg
CmmGlobal GlobalRegUse
        -- By this stage, the only MagicIds remaining should be the
        -- ones which map to a real machine register on this
        -- platform.  Hence ...

-- | Memory addressing modes passed up the tree.
data Amode
        = Amode AddrMode InstrBlock

Now, given a tree (the argument to a CmmLoad) that references memory,
produce a suitable addressing mode.

A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to.  So you can't put
anything in between, lest it overwrite some of those registers.  If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:

    LEA amode, tmp
    ... other computation ...
    ... (tmp) ...

Note [%rip-relative addressing on x86-64]
On x86-64 GHC produces code for use in the "small" or, when `-fPIC` is set,
"small PIC" code models defined by the x86-64 System V ABI (section 3.5.1 of
specification version 0.99).

In general the small code model would allow us to assume that code is located
between 0 and 2^31 - 1. However, this is not true on Windows which, due to
high-entropy ASLR, may place the executable image anywhere in 64-bit address
space. This is problematic since immediate operands in x86-64 are generally
32-bit sign-extended values (with the exception of the 64-bit MOVABS encoding).
Consequently, to avoid overflowing we use %rip-relative addressing universally.
Since %rip-relative addressing comes essentially for free and makes linking far
easier, we use it even on non-Windows platforms.

See also: the documentation for GCC's `-mcmodel=small` flag.

-- | Check whether an integer will fit in 32 bits.
--      A CmmInt is intended to be truncated to the appropriate
--      number of bits, so here we truncate it to Int64.  This is
--      important because e.g. -1 as a CmmInt might be either
--      -1 or 18446744073709551615.
is32BitInteger :: Integer -> Bool
is32BitInteger :: Integer -> Bool
is32BitInteger Integer
i = Int64
i64 Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
<= Int64
0x7fffffff Bool -> Bool -> Bool
&& Int64
i64 Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
>= -Int64
  where i64 :: Int64
i64 = Integer -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i :: Int64

-- | Convert a BlockId to some CmmStatic data
jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
jumpTableEntry :: NCGConfig -> Maybe Label -> CmmStatic
jumpTableEntry NCGConfig
config Maybe Label
Nothing = CmmLit -> CmmStatic
CmmStaticLit (Integer -> Width -> CmmLit
CmmInt Integer
0 (NCGConfig -> Width
ncgWordWidth NCGConfig
jumpTableEntry NCGConfig
_ (Just Label
blockid) = CmmLit -> CmmStatic
CmmStaticLit (CLabel -> CmmLit
CmmLabel CLabel
    where blockLabel :: CLabel
blockLabel = Label -> CLabel
blockLbl Label

-- -----------------------------------------------------------------------------
-- General things for putting together code sequences

-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
mangleIndexTree :: CmmReg -> Int -> CmmExpr
mangleIndexTree :: CmmReg -> Int -> CmmExpr
mangleIndexTree CmmReg
reg Int
  = MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> MachOp
MO_Add Width
width) [CmmReg -> CmmExpr
CmmReg CmmReg
reg, CmmLit -> CmmExpr
CmmLit (Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off) Width
  where width :: Width
width = CmmType -> Width
typeWidth (CmmReg -> CmmType
cmmRegType CmmReg

-- | The dual to getAnyReg: compute an expression into a register, but
--      we don't mind which one it is.
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
expr = do
  r <- HasDebugCallStack => CmmExpr -> NatM Register
CmmExpr -> NatM Register
getRegister CmmExpr
  case r of
    Any Format
rep Reg -> InstrBlock
code -> do
        tmp <- Format -> NatM Reg
getNewRegNat Format
        return (tmp, code tmp)
    Fixed Format
_ Reg
reg InstrBlock
code ->
        (Reg, InstrBlock) -> NatM (Reg, InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reg
reg, InstrBlock

assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code CmmExpr
addrTree CmmExpr
valueTree = do
  Amode addr addr_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
  RegCode64 vcode rhi rlo <- iselExpr64 valueTree
        -- Little-endian store
        mov_lo = Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rlo) (AddrMode -> Operand
OpAddr AddrMode
        mov_hi = Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rhi) (AddrMode -> Operand
OpAddr (Maybe AddrMode -> AddrMode
forall a. HasCallStack => Maybe a -> a
fromJust (AddrMode -> Int -> Maybe AddrMode
addrOffset AddrMode
addr Int
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)

assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_I64Code (CmmLocal LocalReg
dst) CmmExpr
valueTree = do
   RegCode64 vcode r_src_hi r_src_lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
         Reg64 r_dst_hi r_dst_lo = localReg64 dst
         mov_lo = Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r_src_lo) (Reg -> Operand
OpReg Reg
         mov_hi = Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r_src_hi) (Reg -> Operand
OpReg Reg
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi

assignReg_I64Code CmmReg
_ CmmExpr
   = String -> NatM InstrBlock
forall a. HasCallStack => String -> a
panic String
"assignReg_I64Code(i386): invalid lvalue"

iselExpr64 :: HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 :: HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 (CmmLit (CmmInt Integer
i Width
_)) = do
  Reg64 rhi rlo <- NatM Reg64
        r = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i :: Word32)
        q = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer
i Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftR` Int
32) :: Word32)
        code = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
r)) (Reg -> Operand
OpReg Reg
                Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
q)) (Reg -> Operand
OpReg Reg
  return (RegCode64 code rhi rlo)

iselExpr64 (CmmLoad CmmExpr
addrTree CmmType
ty AlignmentSpec
_) | CmmType -> Bool
isWord64 CmmType
ty = do
   Amode addr addr_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
   Reg64 rhi rlo <- getNewReg64
        mov_lo = Format -> Operand -> Operand -> Instr
MOV Format
II32 (AddrMode -> Operand
OpAddr AddrMode
addr) (Reg -> Operand
OpReg Reg
        mov_hi = Format -> Operand -> Operand -> Instr
MOV Format
II32 (AddrMode -> Operand
OpAddr (Maybe AddrMode -> AddrMode
forall a. HasCallStack => Maybe a -> a
fromJust (AddrMode -> Int -> Maybe AddrMode
addrOffset AddrMode
addr Int
4))) (Reg -> Operand
OpReg Reg
   return (
            RegCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) rhi rlo

iselExpr64 (CmmReg (CmmLocal LocalReg
local_reg)) = do
  let Reg64 Reg
hi Reg
lo = HasDebugCallStack => LocalReg -> Reg64
LocalReg -> Reg64
localReg64 LocalReg
  RegCode64 InstrBlock -> NatM (RegCode64 InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> Reg -> Reg -> RegCode64 InstrBlock
forall code. code -> Reg -> Reg -> RegCode64 code
RegCode64 InstrBlock
forall a. OrdList a
nilOL Reg
hi Reg

iselExpr64 (CmmMachOp (MO_Add Width
_) [CmmExpr
e1, CmmLit (CmmInt Integer
i Width
_)]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   Reg64 rhi rlo <- getNewReg64
        r = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i :: Word32)
        q = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer
i Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftR` Int
32) :: Word32)
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
ADD Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
r)) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
ADC Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
q)) (Reg -> Operand
OpReg Reg
rhi) ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_Add Width
_) [CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   RegCode64 code2 r2hi r2lo <- iselExpr64 e2
   Reg64 rhi rlo <- getNewReg64
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
ADD Format
II32 (Reg -> Operand
OpReg Reg
r2lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
ADC Format
II32 (Reg -> Operand
OpReg Reg
r2hi) (Reg -> Operand
OpReg Reg
rhi) ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_Sub Width
_) [CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   RegCode64 code2 r2hi r2lo <- iselExpr64 e2
   Reg64 rhi rlo <- getNewReg64
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
SUB Format
II32 (Reg -> Operand
OpReg Reg
r2lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
SBB Format
II32 (Reg -> Operand
OpReg Reg
r2hi) (Reg -> Operand
OpReg Reg
rhi) ]
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_UU_Conv Width
W32 Width
W64) [CmmExpr
expr]) = do
     code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code r_dst_lo `snocOL`
                          XOR II32 (OpReg r_dst_hi) (OpReg r_dst_hi))

iselExpr64 (CmmMachOp (MO_UU_Conv Width
W16 Width
W64) [CmmExpr
expr]) = do
     (rsrc, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code `appOL` toOL [
                          MOVZxL II16 (OpReg rsrc) (OpReg r_dst_lo),
                          XOR    II32 (OpReg r_dst_hi) (OpReg r_dst_hi)

iselExpr64 (CmmMachOp (MO_UU_Conv Width
W8 Width
W64) [CmmExpr
expr]) = do
     (rsrc, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code `appOL` toOL [
                          MOVZxL II8 (OpReg rsrc) (OpReg r_dst_lo),
                          XOR    II32 (OpReg r_dst_hi) (OpReg r_dst_hi)

iselExpr64 (CmmMachOp (MO_SS_Conv Width
W32 Width
W64) [CmmExpr
expr]) = do
     code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code r_dst_lo `snocOL`
                          MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
                          CLTD II32 `snocOL`
                          MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
                          MOV II32 (OpReg edx) (OpReg r_dst_hi))

iselExpr64 (CmmMachOp (MO_SS_Conv Width
W16 Width
W64) [CmmExpr
expr]) = do
     (r, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code `appOL` toOL [
                          MOVSxL II16 (OpReg r) (OpReg eax),
                          CLTD II32,
                          MOV II32 (OpReg eax) (OpReg r_dst_lo),
                          MOV II32 (OpReg edx) (OpReg r_dst_hi)])

iselExpr64 (CmmMachOp (MO_SS_Conv Width
W8 Width
W64) [CmmExpr
expr]) = do
     (r, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
     Reg64 r_dst_hi r_dst_lo <- getNewReg64
     return $ RegCode64 (code `appOL` toOL [
                          MOVSxL II8 (OpReg r) (OpReg eax),
                          CLTD II32,
                          MOV II32 (OpReg eax) (OpReg r_dst_lo),
                          MOV II32 (OpReg edx) (OpReg r_dst_hi)])

iselExpr64 (CmmMachOp (MO_S_Neg Width
_) [CmmExpr
expr]) = do
   RegCode64 code rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   Reg64 rohi rolo <- getNewReg64
        ocode = InstrBlock
code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
rohi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Instr
NEGI Format
II32 (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
SBB Format
II32 (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
rohi) ]
   return (RegCode64 ocode rohi rolo)

-- To multiply two 64-bit numbers we use the following decomposition (in C notation):
--     ((r1hi << 32) + r1lo) * ((r2hi << 32) + r2lo)
--      = ((r2lo * r1hi) << 32)
--      + ((r1lo * r2hi) << 32)
--      + r1lo * r2lo
-- Note that @(r1hi * r2hi) << 64@ can be dropped because it overflows completely.

iselExpr64 (CmmMachOp (MO_Mul Width
_) [CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   RegCode64 code2 r2hi r2lo <- iselExpr64 e2
   Reg64 rhi rlo <- getNewReg64
   tmp <- getNewRegNat II32
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
r2lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
IMUL Format
II32 (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
r2hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
IMUL Format
II32 (Reg -> Operand
OpReg Reg
eax) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
ADD  Format
II32 (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Instr
MUL2 Format
II32 (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
ADD  Format
II32 (Reg -> Operand
OpReg Reg
edx) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV  Format
II32 (Reg -> Operand
OpReg Reg
eax) (Reg -> Operand
OpReg Reg
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_S_MulMayOflo Width
W64) [CmmExpr]
_) = do
   -- Performance sensitive users won't use 32 bit so let's keep it simple:
   -- We always return a (usually false) positive.
   Reg64 rhi rlo <- NatM Reg64
   let code = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL   [
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
1)) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
1)) (Reg -> Operand
OpReg Reg
   return (RegCode64 code rhi rlo)

-- To shift a 64-bit number to the left we use the SHLD and SHL instructions.
-- We use SHLD to shift the bits in @rhi@ to the left while copying
-- high bits from @rlo@ to fill the new space in the low bits of @rhi@.
-- That leaves @rlo@ unchanged, so we use SHL to shift the bits of @rlo@ left.
-- However, both these instructions only use the lowest 5 bits from %cl to do
-- their shifting. So if the sixth bit (0x32) is set then we additionally move
-- the contents of @rlo@ to @rhi@ and clear @rlo@.

iselExpr64 (CmmMachOp (MO_Shl Width
_) [CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   code2 <- getAnyReg e2
   Reg64 rhi rlo <- getNewReg64
   lbl1 <- newBlockId
   lbl2 <- newBlockId
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                Reg -> InstrBlock
code2 Reg
ecx InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Operand -> Instr
SHLD Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
SHL Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
TEST Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
32)) (Reg -> Operand
OpReg Reg
                       Cond -> Label -> Instr
JXX Cond
EQQ Label
                       Cond -> Label -> Instr
JXX Cond
                       Label -> Instr
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
rlo) (Reg -> Operand
OpReg Reg
                       Cond -> Label -> Instr
JXX Cond
                       Label -> Instr
   return (RegCode64 code rhi rlo)

-- Similar to above, however now we're shifting to the right
-- and we're doing a signed shift which means that @rhi@ needs
-- to be set to either 0 if @rhi@ is positive or 0xffffffff otherwise,
-- and if the sixth bit of %cl is set (so the shift amount is more than 32).
-- To accomplish that we shift @rhi@ by 31.

iselExpr64 (CmmMachOp (MO_S_Shr Width
_) [CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   (r2, code2) <- getSomeReg e2
   Reg64 rhi rlo <- getNewReg64
   lbl1 <- newBlockId
   lbl2 <- newBlockId
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r2) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Operand -> Instr
SHRD Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
SAR Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
TEST Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
32)) (Reg -> Operand
OpReg Reg
                       Cond -> Label -> Instr
JXX Cond
EQQ Label
                       Cond -> Label -> Instr
JXX Cond
                       Label -> Instr
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
SAR Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
31)) (Reg -> Operand
OpReg Reg
                       Cond -> Label -> Instr
JXX Cond
                       Label -> Instr
   return (RegCode64 code rhi rlo)

-- Similar to the above.

iselExpr64 (CmmMachOp (MO_U_Shr Width
_) [CmmExpr
e2]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   (r2, code2) <- getSomeReg e2
   Reg64 rhi rlo <- getNewReg64
   lbl1 <- newBlockId
   lbl2 <- newBlockId
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r2) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Operand -> Instr
SHRD Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
SHR Format
II32 (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
TEST Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
32)) (Reg -> Operand
OpReg Reg
                       Cond -> Label -> Instr
JXX Cond
EQQ Label
                       Cond -> Label -> Instr
JXX Cond
                       Label -> Instr
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
rhi) (Reg -> Operand
OpReg Reg
                       Cond -> Label -> Instr
JXX Cond
                       Label -> Instr
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmMachOp (MO_And Width
_) [CmmExpr
e2]) = (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64ParallelBin Format -> Operand -> Operand -> Instr
AND CmmExpr
e1 CmmExpr
iselExpr64 (CmmMachOp (MO_Or  Width
_) [CmmExpr
e2]) = (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64ParallelBin Format -> Operand -> Operand -> Instr
OR  CmmExpr
e1 CmmExpr
iselExpr64 (CmmMachOp (MO_Xor Width
_) [CmmExpr
e2]) = (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64ParallelBin Format -> Operand -> Operand -> Instr
XOR CmmExpr
e1 CmmExpr

iselExpr64 (CmmMachOp (MO_Not Width
_) [CmmExpr
e1]) = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   Reg64 rhi rlo <- getNewReg64
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Instr
NOT Format
II32 (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Instr
NOT Format
II32 (Reg -> Operand
OpReg Reg
   return (RegCode64 code rhi rlo)

iselExpr64 (CmmRegOff CmmReg
r Int
i) = HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 (CmmReg -> Int -> CmmExpr
mangleIndexTree CmmReg
r Int

iselExpr64 CmmExpr
   = do
      platform <- NatM Platform
      pprPanic "iselExpr64(i386)" (pdoc platform expr $+$ text (show expr))

iselExpr64ParallelBin :: (Format -> Operand -> Operand -> Instr)
                      -> CmmExpr -> CmmExpr -> NatM (RegCode64 (OrdList Instr))
iselExpr64ParallelBin :: (Format -> Operand -> Operand -> Instr)
-> CmmExpr -> CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64ParallelBin Format -> Operand -> Operand -> Instr
op CmmExpr
e1 CmmExpr
e2 = do
   RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
   RegCode64 code2 r2hi r2lo <- iselExpr64 e2
   Reg64 rhi rlo <- getNewReg64
        code =  InstrBlock
code1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
code2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1hi) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
op  Format
II32 (Reg -> Operand
OpReg Reg
r2lo) (Reg -> Operand
OpReg Reg
                       Format -> Operand -> Operand -> Instr
op  Format
II32 (Reg -> Operand
OpReg Reg
r2hi) (Reg -> Operand
OpReg Reg
   return (RegCode64 code rhi rlo)


-- This is a helper data type which helps reduce the code duplication for
-- the code generation of arithmetic operations. This is not specifically
-- targetted for any particular type like Int8, Int32 etc
data VectorArithInstns = VA_Add | VA_Sub | VA_Mul | VA_Div | VA_Min | VA_Max

getRegister :: HasDebugCallStack => CmmExpr -> NatM Register
getRegister :: HasDebugCallStack => CmmExpr -> NatM Register
getRegister CmmExpr
e = do platform <- NatM Platform
                   is32Bit <- is32BitPlatform
                   getRegister' platform is32Bit e

getRegister' :: HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register

getRegister' :: HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit (CmmReg CmmReg
  = case CmmReg
reg of
        CmmGlobal (GlobalRegUse GlobalReg
PicBaseReg CmmType
         | Bool
is32Bit ->
            -- on x86_64, we have %rip for PicBaseReg, but it's not
            -- a full-featured register, it can only be used for
            -- rip-relative addressing.
            do reg' <- Format -> NatM Reg
getPicBaseNat (Bool -> Format
archWordFormat Bool
               return (Fixed (archWordFormat is32Bit) reg' nilOL)
_ ->
          let ty :: CmmType
ty = CmmReg -> CmmType
cmmRegType CmmReg
              reg_fmt :: Format
reg_fmt = CmmType -> Format
cmmTypeFormat CmmType
          in Register -> NatM Register
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Register -> NatM Register) -> Register -> NatM Register
forall a b. (a -> b) -> a -> b
$ Format -> Reg -> InstrBlock -> Register
Fixed Format
reg_fmt (Platform -> CmmReg -> Reg
getRegisterReg Platform
platform CmmReg
reg) InstrBlock
forall a. OrdList a

getRegister' Platform
platform Bool
is32Bit (CmmRegOff CmmReg
r Int
  = HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit (CmmExpr -> NatM Register) -> CmmExpr -> NatM Register
forall a b. (a -> b) -> a -> b
$ CmmReg -> Int -> CmmExpr
mangleIndexTree CmmReg
r Int

getRegister' Platform
platform Bool
is32Bit (CmmMachOp (MO_RelaxedRead Width
w) [CmmExpr
  = HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit (CmmExpr -> CmmType -> AlignmentSpec -> CmmExpr
CmmLoad CmmExpr
e (Width -> CmmType
cmmBits Width
w) AlignmentSpec

getRegister' Platform
platform Bool
is32Bit (CmmMachOp (MO_AlignmentCheck Int
align Width
_) [CmmExpr
  = Int -> Register -> Register
addAlignmentCheck Int
align (Register -> Register) -> NatM Register -> NatM Register
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit CmmExpr

-- for 32-bit architectures, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W64 Width
                     [CmmMachOp (MO_U_Shr Width
W64) [CmmExpr
x,CmmLit (CmmInt Integer
32 Width
 | Bool
is32Bit = do
  RegCode64 code rhi _rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
  return $ Fixed II32 rhi code

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W64 Width
                     [CmmMachOp (MO_U_Shr Width
W64) [CmmExpr
x,CmmLit (CmmInt Integer
32 Width
 | Bool
is32Bit = do
  RegCode64 code rhi _rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
  return $ Fixed II32 rhi code

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W64 Width
W32) [CmmExpr
 | Bool
is32Bit = do
  RegCode64 code _rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
  return $ Fixed II32 rlo code

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W64 Width
W32) [CmmExpr
 | Bool
is32Bit = do
  RegCode64 code _rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
  return $ Fixed II32 rlo code

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W64 Width
W8) [CmmExpr
 | Bool
is32Bit = do
  RegCode64 code _rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
  ro <- getNewRegNat II8
  return $ Fixed II8 ro (code `appOL` toOL [ MOVZxL II8 (OpReg rlo) (OpReg ro) ])

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W64 Width
W16) [CmmExpr
 | Bool
is32Bit = do
  RegCode64 code _rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
  ro <- getNewRegNat II16
  return $ Fixed II16 ro (code `appOL` toOL [ MOVZxL II16 (OpReg rlo) (OpReg ro) ])

-- catch simple cases of zero- or sign-extended load
getRegister' Platform
_ Bool
_ (CmmMachOp (MO_UU_Conv Width
W8 Width
W32) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_]) = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVZxL Format
II8) CmmExpr
  return (Any II32 code)

getRegister' Platform
_ Bool
_ (CmmMachOp (MO_SS_Conv Width
W8 Width
W32) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_]) = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II8) CmmExpr
  return (Any II32 code)

getRegister' Platform
_ Bool
_ (CmmMachOp (MO_UU_Conv Width
W16 Width
W32) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_]) = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVZxL Format
II16) CmmExpr
  return (Any II32 code)

getRegister' Platform
_ Bool
_ (CmmMachOp (MO_SS_Conv Width
W16 Width
W32) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
_]) = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II16) CmmExpr
  return (Any II32 code)

-- catch simple cases of zero- or sign-extended load
getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W8 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVZxL Format
II8) CmmExpr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W8 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II8) CmmExpr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W16 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVZxL Format
II16) CmmExpr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W16 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II16) CmmExpr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_UU_Conv Width
W32 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOV Format
II32) CmmExpr
addr -- 32-bit loads zero-extend
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_SS_Conv Width
W32 Width
W64) [CmmLoad CmmExpr
addr CmmType
_ AlignmentSpec
 | Bool -> Bool
not Bool
is32Bit = do
  code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOVSxL Format
II32) CmmExpr
  return (Any II64 code)

getRegister' Platform
_ Bool
is32Bit (CmmMachOp (MO_Add Width
W64) [CmmReg (CmmGlobal (GlobalRegUse GlobalReg
PicBaseReg CmmType
                                     CmmLit CmmLit
 | Bool -> Bool
not Bool
is32Bit =
      Register -> NatM Register
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Register -> NatM Register) -> Register -> NatM Register
forall a b. (a -> b) -> a -> b
$ Format -> (Reg -> InstrBlock) -> Register
Any Format
II64 (\Reg
dst -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
        Format -> Operand -> Operand -> Instr
LEA Format
II64 (AddrMode -> Operand
OpAddr (Imm -> AddrMode
ripRel (CmmLit -> Imm
litToImm CmmLit
displacement))) (Reg -> Operand
OpReg Reg

getRegister' Platform
_ Bool
_ (CmmMachOp MachOp
mop []) =
  String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86): nullary MachOp" (String -> SDoc
forall doc. IsLine doc => String -> doc
text (String -> SDoc) -> String -> SDoc
forall a b. (a -> b) -> a -> b
$ MachOp -> String
forall a. Show a => a -> String
show MachOp

getRegister' Platform
platform Bool
is32Bit (CmmMachOp MachOp
mop [CmmExpr
x]) = do -- unary MachOps
    sse4_1 <- NatM Bool
    avx    <- avxEnabled
    case mop of
      MO_F_Neg Width
w  -> Width -> CmmExpr -> NatM Register
sse2NegCode Width
w CmmExpr

      MO_S_Neg Width
w -> (Format -> Operand -> Instr) -> Format -> NatM Register
triv_ucode Format -> Operand -> Instr
NEGI (Width -> Format
intFormat Width
      MO_Not Width
w   -> (Format -> Operand -> Instr) -> Format -> NatM Register
triv_ucode Format -> Operand -> Instr
NOT  (Width -> Format
intFormat Width

      -- Nop conversions
      MO_UU_Conv Width
W32 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W32 CmmExpr
      MO_SS_Conv Width
W32 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W32 CmmExpr
      MO_XX_Conv Width
W32 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W32 CmmExpr
      MO_UU_Conv Width
W16 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W16 CmmExpr
      MO_SS_Conv Width
W16 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W16 CmmExpr
      MO_XX_Conv Width
W16 Width
W8  -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W16 CmmExpr
      MO_UU_Conv Width
W32 Width
W16 -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W32 CmmExpr
      MO_SS_Conv Width
W32 Width
W16 -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W32 CmmExpr
      MO_XX_Conv Width
W32 Width
W16 -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W32 CmmExpr

      MO_UU_Conv Width
W64 Width
W32 | Bool -> Bool
not Bool
is32Bit -> Format -> CmmExpr -> NatM Register
conversionNop Format
II64 CmmExpr
      MO_SS_Conv Width
W64 Width
W32 | Bool -> Bool
not Bool
is32Bit -> Format -> CmmExpr -> NatM Register
conversionNop Format
II64 CmmExpr
      MO_XX_Conv Width
W64 Width
W32 | Bool -> Bool
not Bool
is32Bit -> Format -> CmmExpr -> NatM Register
conversionNop Format
II64 CmmExpr
      MO_UU_Conv Width
W64 Width
W16 | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W64 CmmExpr
      MO_SS_Conv Width
W64 Width
W16 | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W64 CmmExpr
      MO_XX_Conv Width
W64 Width
W16 | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI16Reg Width
W64 CmmExpr
      MO_UU_Conv Width
W64 Width
W8  | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W64 CmmExpr
      MO_SS_Conv Width
W64 Width
W8  | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W64 CmmExpr
      MO_XX_Conv Width
W64 Width
W8  | Bool -> Bool
not Bool
is32Bit -> Width -> CmmExpr -> NatM Register
toI8Reg  Width
W64 CmmExpr

      MO_UU_Conv Width
rep1 Width
rep2 | Width
rep1 Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
rep2 -> Format -> CmmExpr -> NatM Register
conversionNop (Width -> Format
intFormat Width
rep1) CmmExpr
      MO_SS_Conv Width
rep1 Width
rep2 | Width
rep1 Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
rep2 -> Format -> CmmExpr -> NatM Register
conversionNop (Width -> Format
intFormat Width
rep1) CmmExpr
      MO_XX_Conv Width
rep1 Width
rep2 | Width
rep1 Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
rep2 -> Format -> CmmExpr -> NatM Register
conversionNop (Width -> Format
intFormat Width
rep1) CmmExpr

      MO_FW_Bitcast Width
W32 -> Format -> Format -> CmmExpr -> NatM Register
bitcast Format
FF32 Format
II32 CmmExpr
      MO_WF_Bitcast Width
W32 -> Format -> Format -> CmmExpr -> NatM Register
bitcast Format
II32 Format
FF32 CmmExpr
      MO_FW_Bitcast Width
W64 -> Format -> Format -> CmmExpr -> NatM Register
bitcast Format
FF64 Format
II64 CmmExpr
      MO_WF_Bitcast Width
W64 -> Format -> Format -> CmmExpr -> NatM Register
bitcast Format
II64 Format
FF64 CmmExpr
      MO_WF_Bitcast {}  -> NatM Register
forall a. NatM a
      MO_FW_Bitcast {}  -> NatM Register
forall a. NatM a

      -- widenings
      MO_UU_Conv Width
W8  Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W32 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
      MO_UU_Conv Width
W16 Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W32 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
      MO_UU_Conv Width
W8  Width
W16 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W16 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr

      MO_SS_Conv Width
W8  Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W32 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
      MO_SS_Conv Width
W16 Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W32 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
      MO_SS_Conv Width
W8  Width
W16 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W16 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr

      -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
      -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
      -- has 8-bit version). So for 32-bit code, we'll just zero-extend.
      MO_XX_Conv Width
W8  Width
          | Bool
is32Bit   -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8 Width
W32 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
          | Bool
otherwise -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8 Width
W32 Format -> Operand -> Operand -> Instr
MOV CmmExpr
      MO_XX_Conv Width
W8  Width
          | Bool
is32Bit   -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8 Width
W16 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
          | Bool
otherwise -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8 Width
W16 Format -> Operand -> Operand -> Instr
MOV CmmExpr
      MO_XX_Conv Width
W16 Width
W32 -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W32 Format -> Operand -> Operand -> Instr
MOV CmmExpr

      MO_UU_Conv Width
W8  Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W64 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
      MO_UU_Conv Width
W16 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W64 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
      MO_UU_Conv Width
W32 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W32 Width
W64 Format -> Operand -> Operand -> Instr
MOVZxL CmmExpr
      MO_SS_Conv Width
W8  Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W64 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
      MO_SS_Conv Width
W16 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W64 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
      MO_SS_Conv Width
W32 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W32 Width
W64 Format -> Operand -> Operand -> Instr
MOVSxL CmmExpr
      -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl.
      -- However, we don't want the register allocator to throw it
      -- away as an unnecessary reg-to-reg move, so we keep it in
      -- the form of a movzl and print it as a movl later.
      -- This doesn't apply to MO_XX_Conv since in this case we don't care about
      -- the upper bits. So we can just use MOV.
      MO_XX_Conv Width
W8  Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W8  Width
W64 Format -> Operand -> Operand -> Instr
MOV CmmExpr
      MO_XX_Conv Width
W16 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W16 Width
W64 Format -> Operand -> Operand -> Instr
MOV CmmExpr
      MO_XX_Conv Width
W32 Width
W64 | Bool -> Bool
not Bool
is32Bit -> Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
W32 Width
W64 Format -> Operand -> Operand -> Instr
MOV CmmExpr

      MO_FF_Conv Width
W32 Width
W64 -> Width -> CmmExpr -> NatM Register
coerceFP2FP Width
W64 CmmExpr
      MO_FF_Conv Width
W64 Width
W32 -> Width -> CmmExpr -> NatM Register
coerceFP2FP Width
W32 CmmExpr

      MO_FF_Conv Width
from Width
to -> Width -> Width -> NatM Register
forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
      MO_UU_Conv Width
from Width
to -> Width -> Width -> NatM Register
forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
      MO_SS_Conv Width
from Width
to -> Width -> Width -> NatM Register
forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
      MO_XX_Conv Width
from Width
to -> Width -> Width -> NatM Register
forall a. Width -> Width -> NatM a
invalidConversion Width
from Width

      MO_FS_Truncate Width
from Width
to -> Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int Width
from Width
to CmmExpr
      MO_SF_Round    Width
from Width
to -> Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP Width
from Width
to CmmExpr

      MO_VF_Neg Int
l Width
w  | Bool
avx       -> Int -> Width -> CmmExpr -> NatM Register
vector_float_negate_avx Int
l Width
w CmmExpr
                     | Bool
otherwise -> Int -> Width -> CmmExpr -> NatM Register
vector_float_negate_sse Int
l Width
w CmmExpr
      -- SIMD NCG TODO: add integer negation
      MO_VS_Neg {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp

      MO_VF_Broadcast Int
l Width
        | Bool
        -> Int -> Width -> CmmExpr -> NatM Register
vector_float_broadcast_avx Int
l Width
w CmmExpr
        | Bool
        -> case Width
w of
W32 | Bool -> Bool
not Bool
              -> String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"32-bit float broadcast requires -msse4 or -fllvm."
_ -> Int -> Width -> CmmExpr -> NatM Register
vector_float_broadcast_sse Int
l Width
w CmmExpr
      MO_V_Broadcast Int
l Width
        -> Int -> Width -> CmmExpr -> NatM Register
vector_int_broadcast Int
l Width
w CmmExpr

      -- Binary MachOps
      MO_Add {}    -> NatM Register
forall a. NatM a
      MO_Sub {}    -> NatM Register
forall a. NatM a
      MO_Eq {}     -> NatM Register
forall a. NatM a
      MO_Ne {}     -> NatM Register
forall a. NatM a
      MO_Mul {}    -> NatM Register
forall a. NatM a
      MO_S_MulMayOflo {} -> NatM Register
forall a. NatM a
      MO_S_Quot {} -> NatM Register
forall a. NatM a
      MO_S_Rem {}  -> NatM Register
forall a. NatM a
      MO_U_Quot {} -> NatM Register
forall a. NatM a
      MO_U_Rem {}  -> NatM Register
forall a. NatM a
      MO_S_Ge {}   -> NatM Register
forall a. NatM a
      MO_S_Le {}   -> NatM Register
forall a. NatM a
      MO_S_Gt {}   -> NatM Register
forall a. NatM a
      MO_S_Lt {}   -> NatM Register
forall a. NatM a
      MO_U_Ge {}   -> NatM Register
forall a. NatM a
      MO_U_Le {}   -> NatM Register
forall a. NatM a
      MO_U_Gt {}   -> NatM Register
forall a. NatM a
      MO_U_Lt {}   -> NatM Register
forall a. NatM a
      MO_F_Add {}  -> NatM Register
forall a. NatM a
      MO_F_Sub {}  -> NatM Register
forall a. NatM a
      MO_F_Mul {}  -> NatM Register
forall a. NatM a
      MO_F_Quot {} -> NatM Register
forall a. NatM a
      MO_F_Eq {}   -> NatM Register
forall a. NatM a
      MO_F_Ne {}   -> NatM Register
forall a. NatM a
      MO_F_Ge {}   -> NatM Register
forall a. NatM a
      MO_F_Le {}   -> NatM Register
forall a. NatM a
      MO_F_Gt {}   -> NatM Register
forall a. NatM a
      MO_F_Lt {}   -> NatM Register
forall a. NatM a
      MO_F_Min {}  -> NatM Register
forall a. NatM a
      MO_F_Max {}  -> NatM Register
forall a. NatM a
      MO_And {}    -> NatM Register
forall a. NatM a
      MO_Or {}     -> NatM Register
forall a. NatM a
      MO_Xor {}    -> NatM Register
forall a. NatM a
      MO_Shl {}    -> NatM Register
forall a. NatM a
      MO_U_Shr {}  -> NatM Register
forall a. NatM a
      MO_S_Shr {}  -> NatM Register
forall a. NatM a

      MO_V_Extract {}     -> NatM Register
forall a. NatM a
      MO_V_Add {}         -> NatM Register
forall a. NatM a
      MO_V_Sub {}         -> NatM Register
forall a. NatM a
      MO_V_Mul {}         -> NatM Register
forall a. NatM a
      MO_VS_Quot {}       -> NatM Register
forall a. NatM a
      MO_VS_Rem {}        -> NatM Register
forall a. NatM a
      MO_VU_Quot {}       -> NatM Register
forall a. NatM a
      MO_VU_Rem {}        -> NatM Register
forall a. NatM a
      MO_V_Shuffle {}     -> NatM Register
forall a. NatM a
      MO_VF_Shuffle {}    -> NatM Register
forall a. NatM a
      MO_VU_Min {}  -> NatM Register
forall a. NatM a
      MO_VU_Max {}  -> NatM Register
forall a. NatM a
      MO_VS_Min {}  -> NatM Register
forall a. NatM a
      MO_VS_Max {}  -> NatM Register
forall a. NatM a
      MO_VF_Min {}  -> NatM Register
forall a. NatM a
      MO_VF_Max {}  -> NatM Register
forall a. NatM a

      MO_VF_Extract {}    -> NatM Register
forall a. NatM a
      MO_VF_Add {}        -> NatM Register
forall a. NatM a
      MO_VF_Sub {}        -> NatM Register
forall a. NatM a
      MO_VF_Mul {}        -> NatM Register
forall a. NatM a
      MO_VF_Quot {}       -> NatM Register
forall a. NatM a

      -- Ternary MachOps
      MO_FMA {}           -> NatM Register
forall a. NatM a
      MO_VF_Insert {}     -> NatM Register
forall a. NatM a
      MO_V_Insert {}      -> NatM Register
forall a. NatM a

      --_other -> pprPanic "getRegister" (pprMachOp mop)
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
triv_ucode Format -> Operand -> Instr
instr Format
format = Format -> (Operand -> Instr) -> CmmExpr -> NatM Register
trivialUCode Format
format (Format -> Operand -> Instr
instr Format
format) CmmExpr

        -- signed or unsigned extension.
        integerExtend :: Width -> Width
                      -> (Format -> Operand -> Operand -> Instr)
                      -> CmmExpr -> NatM Register
        integerExtend :: Width
-> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> NatM Register
integerExtend Width
from Width
to Format -> Operand -> Operand -> Instr
instr CmmExpr
expr = do
            (reg,e_code) <- if Width
from Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W8 then HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
                                          else CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
                code Reg
dst =
e_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
instr (Width -> Format
intFormat Width
from) (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
            return (Any (intFormat to) code)

        bitcast :: Format -> Format -> CmmExpr -> NatM Register
        bitcast :: Format -> Format -> CmmExpr -> NatM Register
bitcast Format
fmt Format
rfmt CmmExpr
expr =
          do (src, e_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
             let code = \Reg
dst -> InstrBlock
e_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Operand -> Operand -> Instr
MOVD Format
fmt (Reg -> Operand
OpReg Reg
src) (Reg -> Operand
OpReg Reg
             return (Any rfmt code)

        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg :: Width -> CmmExpr -> NatM Register
toI8Reg Width
new_rep CmmExpr
            = do codefn <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
                 return (Any (intFormat new_rep) codefn)
                -- HACK: use getAnyReg to get a byte-addressable register.
                -- If the source was a Fixed register, this will add the
                -- mov instruction to put it into the desired destination.
                -- We're assuming that the destination won't be a fixed
                -- non-byte-addressable register; it won't be, because all
                -- fixed registers are word-sized.

        toI16Reg :: Width -> CmmExpr -> NatM Register
toI16Reg = Width -> CmmExpr -> NatM Register
toI8Reg -- for now

        conversionNop :: Format -> CmmExpr -> NatM Register
        conversionNop :: Format -> CmmExpr -> NatM Register
conversionNop Format
new_format CmmExpr
            = do e_code <- HasDebugCallStack => Platform -> Bool -> CmmExpr -> NatM Register
Platform -> Bool -> CmmExpr -> NatM Register
getRegister' Platform
platform Bool
is32Bit CmmExpr
                 return (swizzleRegisterRep e_code new_format)

        vector_float_negate_avx :: Length -> Width -> CmmExpr -> NatM Register
        vector_float_negate_avx :: Int -> Width -> CmmExpr -> NatM Register
vector_float_negate_avx Int
l Width
w CmmExpr
expr = do
          let fmt :: Format
              mask :: CmmLit
fmt, CmmLit
mask) = case Width
w of
W32 -> (Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtFloat , Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a. (Num a, Bits a) => Int -> a
bit Int
31) Width
w) -- TODO: these should be negative 0 floating point literals,
W64 -> (Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtDouble, Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a. (Num a, Bits a) => Int -> a
bit Int
63) Width
w) -- but we don't currently have those in Cmm.
_ -> String -> (Format, CmmLit)
forall a. HasCallStack => String -> a
panic String
"AVX floating-point negation: elements must be FF32 or FF64"
          (maskReg, maskCode) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg (CmmLit -> CmmExpr
CmmLit (CmmLit -> CmmExpr) -> CmmLit -> CmmExpr
forall a b. (a -> b) -> a -> b
$ [CmmLit] -> CmmLit
CmmVec ([CmmLit] -> CmmLit) -> [CmmLit] -> CmmLit
forall a b. (a -> b) -> a -> b
$ Int -> CmmLit -> [CmmLit]
forall a. Int -> a -> [a]
replicate Int
l CmmLit
          (reg, exp) <- getSomeReg expr
          let code Reg
dst = InstrBlock
maskCode InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                         (Format -> Operand -> Operand -> Instr
VMOVU Format
fmt (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                         (Format -> Operand -> Reg -> Reg -> Instr
VXOR Format
fmt (Reg -> Operand
OpReg Reg
maskReg) Reg
dst Reg
          return (Any fmt code)

        vector_float_negate_sse :: Length -> Width -> CmmExpr -> NatM Register
        vector_float_negate_sse :: Int -> Width -> CmmExpr -> NatM Register
vector_float_negate_sse Int
l Width
w CmmExpr
expr = do
          let fmt :: Format
              mask :: CmmLit
fmt, CmmLit
mask) = case Width
w of
W32 -> (Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtFloat , Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a. (Num a, Bits a) => Int -> a
bit Int
31) Width
w) -- Same comment as for vector_float_negate_avx,
W64 -> (Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
FmtDouble, Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a. (Num a, Bits a) => Int -> a
bit Int
63) Width
w) -- these should be -0.0 CmmFloat values.
_ -> String -> (Format, CmmLit)
forall a. HasCallStack => String -> a
panic String
"SSE floating-point negation: elements must be FF32 or FF64"
          (maskReg, maskCode) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg (CmmLit -> CmmExpr
CmmLit (CmmLit -> CmmExpr) -> CmmLit -> CmmExpr
forall a b. (a -> b) -> a -> b
$ [CmmLit] -> CmmLit
CmmVec ([CmmLit] -> CmmLit) -> [CmmLit] -> CmmLit
forall a b. (a -> b) -> a -> b
$ Int -> CmmLit -> [CmmLit]
forall a. Int -> a -> [a]
replicate Int
l CmmLit
          (reg, exp) <- getSomeReg expr
          let code Reg
dst = InstrBlock
maskCode InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                         (Format -> Operand -> Operand -> Instr
MOVU Format
fmt (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                         (Format -> Operand -> Operand -> Instr
XOR  Format
fmt (Reg -> Operand
OpReg Reg
maskReg) (Reg -> Operand
OpReg Reg
          return (Any fmt code)


        vector_float_broadcast_avx :: Length
                                   -> Width
                                   -> CmmExpr
                                   -> NatM Register
        vector_float_broadcast_avx :: Int -> Width -> CmmExpr -> NatM Register
vector_float_broadcast_avx Int
len Width
w CmmExpr
expr = do
          (dst, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
          let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len (Width -> ScalarFormat
floatScalarFormat Width
              code = case Width
w of
W64 -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0) (Reg -> Operand
OpReg Reg
dst) Reg
dst Reg
_   -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Imm -> Operand -> Reg -> Instr
fmt (Int -> Imm
ImmInt Int
0b00_10_0000) (Reg -> Operand
OpReg Reg
dst) Reg
                            , Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0) (Reg -> Operand
OpReg Reg
dst) Reg
dst Reg
dst ]
          return $ Fixed fmt dst (exp `appOL` code)

        vector_float_broadcast_sse :: Length
                                   -> Width
                                   -> CmmExpr
                                   -> NatM Register
        vector_float_broadcast_sse :: Int -> Width -> CmmExpr -> NatM Register
vector_float_broadcast_sse Int
len Width
w CmmExpr
expr = do
          (dst, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
          let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len (Width -> ScalarFormat
floatScalarFormat Width
              code = case Width
w of
W64 -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Imm -> Operand -> Reg -> Instr
SHUF Format
fmt (Int -> Imm
ImmInt Int
0) (Reg -> Operand
OpReg Reg
dst) Reg
_   -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Imm -> Operand -> Reg -> Instr
fmt (Int -> Imm
ImmInt Int
0b00_10_0000) (Reg -> Operand
OpReg Reg
dst) Reg
                            , Format -> Imm -> Operand -> Reg -> Instr
SHUF Format
fmt (Int -> Imm
ImmInt Int
0) (Reg -> Operand
OpReg Reg
dst) Reg
dst ]
          return $ Fixed fmt dst (exp `appOL` code)

        vector_int_broadcast :: Length
                             -> Width
                             -> CmmExpr
                             -> NatM Register
        vector_int_broadcast :: Int -> Width -> CmmExpr -> NatM Register
vector_int_broadcast Int
len Width
W64 CmmExpr
expr = do
          (reg, exp) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
          let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
          return $ Any fmt (\Reg
dst -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                    (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                    (Format -> Operand -> Reg -> Instr
fmt (Reg -> Operand
OpReg Reg
dst) Reg
        vector_int_broadcast Int
len Width
W32 CmmExpr
expr = do
          (reg, exp) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
          let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
          return $ Any fmt (\Reg
dst -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                    (Format -> Operand -> Operand -> Instr
MOVD Format
II32 (Reg -> Operand
OpReg Reg
reg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                    (Format -> Imm -> Operand -> Reg -> Instr
fmt (Int -> Imm
ImmInt Int
0x00) (Reg -> Operand
OpReg Reg
dst) Reg
        vector_int_broadcast Int
_ Width
_ CmmExpr
_ =
          String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Unsupported Integer vector broadcast operation; please use -fllvm."

getRegister' Platform
platform Bool
is32Bit (CmmMachOp MachOp
mop [CmmExpr
x, CmmExpr
y]) = do -- dyadic MachOps
  avx <- NatM Bool
  case mop of
      MO_F_Eq Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
EQQ CmmExpr
x CmmExpr
      MO_F_Ne Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
NE  CmmExpr
x CmmExpr
      MO_F_Gt Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
GTT CmmExpr
x CmmExpr
      MO_F_Ge Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
GE  CmmExpr
x CmmExpr
      -- Invert comparison condition and swap operands
      -- See Note [SSE Parity Checks]
      MO_F_Lt Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
GTT  CmmExpr
y CmmExpr
      MO_F_Le Width
_ -> Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
GE   CmmExpr
y CmmExpr

      MO_Eq Width
_   -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
EQQ CmmExpr
x CmmExpr
      MO_Ne Width
_   -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
NE  CmmExpr
x CmmExpr

      MO_S_Gt Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
GTT CmmExpr
x CmmExpr
      MO_S_Ge Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
GE  CmmExpr
x CmmExpr
      MO_S_Lt Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
LTT CmmExpr
x CmmExpr
      MO_S_Le Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
LE  CmmExpr
x CmmExpr

      MO_U_Gt Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
GU  CmmExpr
x CmmExpr
      MO_U_Ge Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
GEU CmmExpr
x CmmExpr
      MO_U_Lt Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
LU  CmmExpr
x CmmExpr
      MO_U_Le Width
_ -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
LEU CmmExpr
x CmmExpr

      MO_F_Add  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w Format -> Operand -> Operand -> Instr
ADD  CmmExpr
x CmmExpr
      MO_F_Sub  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w Format -> Operand -> Operand -> Instr
SUB  CmmExpr
x CmmExpr
      MO_F_Quot Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w Format -> Operand -> Operand -> Instr
FDIV CmmExpr
x CmmExpr
      MO_F_Mul  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w Format -> Operand -> Operand -> Instr
MUL  CmmExpr
x CmmExpr
      MO_F_Min  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w (MinOrMax -> MinMaxType -> Format -> Operand -> Operand -> Instr
Min MinMaxType
FloatMinMax) CmmExpr
x CmmExpr
      MO_F_Max  Width
w -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
w (MinOrMax -> MinMaxType -> Format -> Operand -> Operand -> Instr
Max MinMaxType
FloatMinMax) CmmExpr
x CmmExpr

      MO_Add Width
rep -> Width -> CmmExpr -> CmmExpr -> NatM Register
add_code Width
rep CmmExpr
x CmmExpr
      MO_Sub Width
rep -> Width -> CmmExpr -> CmmExpr -> NatM Register
sub_code Width
rep CmmExpr
x CmmExpr

      MO_S_Quot Width
rep -> Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
rep Bool
True  Bool
True  CmmExpr
x CmmExpr
      MO_S_Rem  Width
rep -> Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
rep Bool
True  Bool
False CmmExpr
x CmmExpr
      MO_U_Quot Width
rep -> Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
rep Bool
False Bool
True  CmmExpr
x CmmExpr
      MO_U_Rem  Width
rep -> Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
rep Bool
False Bool
False CmmExpr
x CmmExpr

      MO_S_MulMayOflo Width
rep -> Width -> CmmExpr -> CmmExpr -> NatM Register
imulMayOflo Width
rep CmmExpr
x CmmExpr

      MO_Mul Width
W8  -> CmmExpr -> CmmExpr -> NatM Register
imulW8 CmmExpr
x CmmExpr
      MO_Mul Width
rep -> Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
rep Format -> Operand -> Operand -> Instr
      MO_And Width
rep -> Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
rep Format -> Operand -> Operand -> Instr
      MO_Or  Width
rep -> Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
rep Format -> Operand -> Operand -> Instr
      MO_Xor Width
rep -> Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
rep Format -> Operand -> Operand -> Instr

        {- Shift ops on x86s have constraints on their source, it
           either has to be Imm, CL or 1
            => trivialCode is not restrictive enough (sigh.)
      MO_Shl Width
rep   -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
shift_code Width
rep Format -> Operand -> Operand -> Instr
SHL CmmExpr
x CmmExpr
y {-False-}
      MO_U_Shr Width
rep -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
shift_code Width
rep Format -> Operand -> Operand -> Instr
SHR CmmExpr
x CmmExpr
y {-False-}
      MO_S_Shr Width
rep -> Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
shift_code Width
rep Format -> Operand -> Operand -> Instr
SAR CmmExpr
x CmmExpr
y {-False-}

      MO_VF_Shuffle Int
l Width
w [Int]
        | Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
* Width -> Int
widthInBits Width
w Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
        -> if
            | Bool
            -> Int -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
vector_shuffle_float Int
l Width
w CmmExpr
x CmmExpr
y [Int]
            | Bool
            -> String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Please enable the -mavx flag"
        | Bool
        -> String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Please use -fllvm for wide shuffle instructions"

      MO_VF_Extract Int
l Width
W32   | Bool
avx       -> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract Int
l Width
W32 CmmExpr
x CmmExpr
                            | Bool
otherwise -> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract_sse Int
l Width
W32 CmmExpr
x CmmExpr
      MO_VF_Extract Int
l Width
W64               -> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract Int
l Width
W64 CmmExpr
x CmmExpr
      MO_VF_Extract {} -> NatM Register
forall a. NatM a

      MO_V_Extract Int
l Width
W64                -> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_int_extract_sse Int
l Width
W64 CmmExpr
x CmmExpr
      -- SIMD NCG TODO: W32, W16, W8
      MO_V_Extract {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp

      MO_VF_Add Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Add Int
l Width
w CmmExpr
x CmmExpr
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Add Int
l Width
w CmmExpr
x CmmExpr

      MO_VF_Sub Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Sub Int
l Width
w CmmExpr
x CmmExpr
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Sub Int
l Width
w CmmExpr
x CmmExpr

      MO_VF_Mul Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Mul Int
l Width
w CmmExpr
x CmmExpr
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Mul Int
l Width
w CmmExpr
x CmmExpr

      MO_VF_Quot Int
l Width
w        | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Div Int
l Width
w CmmExpr
x CmmExpr
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Div Int
l Width
w CmmExpr
x CmmExpr

      MO_VF_Min Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Min Int
l Width
w CmmExpr
x CmmExpr
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Min Int
l Width
w CmmExpr
x CmmExpr

      MO_VF_Max Int
l Width
w         | Bool
avx       -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
VA_Max Int
l Width
w CmmExpr
x CmmExpr
                            | Bool
otherwise -> VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
VA_Max Int
l Width
w CmmExpr
x CmmExpr

      -- SIMD NCG TODO: integer vector operations
      MO_V_Shuffle {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_V_Add {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_V_Sub {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_V_Mul {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_VS_Quot {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_VS_Rem {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_VU_Quot {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_VU_Rem {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp

      MO_VU_Min {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_VU_Max {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_VS_Min {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp
      MO_VS_Max {} -> MachOp -> NatM Register
forall a. MachOp -> NatM a
needLlvm MachOp

      -- Unary MachOps
      MO_S_Neg {} -> NatM Register
forall a. NatM a
      MO_F_Neg {} -> NatM Register
forall a. NatM a
      MO_Not {} -> NatM Register
forall a. NatM a
      MO_SF_Round {} -> NatM Register
forall a. NatM a
      MO_FS_Truncate {} -> NatM Register
forall a. NatM a
      MO_SS_Conv {} -> NatM Register
forall a. NatM a
      MO_XX_Conv {} -> NatM Register
forall a. NatM a
      MO_FF_Conv {} -> NatM Register
forall a. NatM a
      MO_UU_Conv {} -> NatM Register
forall a. NatM a
      MO_WF_Bitcast {} -> NatM Register
forall a. NatM a
      MO_FW_Bitcast  {} -> NatM Register
forall a. NatM a
      MO_RelaxedRead {} -> NatM Register
forall a. NatM a
      MO_AlignmentCheck {} -> NatM Register
forall a. NatM a
      MO_VS_Neg {} -> NatM Register
forall a. NatM a
      MO_VF_Neg {} -> NatM Register
forall a. NatM a
      MO_V_Broadcast {} -> NatM Register
forall a. NatM a
      MO_VF_Broadcast {} -> NatM Register
forall a. NatM a

      -- Ternary MachOps
      MO_FMA {} -> NatM Register
forall a. NatM a
      MO_V_Insert {} -> NatM Register
forall a. NatM a
      MO_VF_Insert {} -> NatM Register
forall a. NatM a

    triv_op :: Width -> (Format -> Operand -> Operand -> Instr) -> NatM Register
triv_op Width
width Format -> Operand -> Operand -> Instr
instr = Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode Width
width Operand -> Operand -> Instr
op ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just Operand -> Operand -> Instr
op) CmmExpr
x CmmExpr
                        where op :: Operand -> Operand -> Instr
op   = Format -> Operand -> Operand -> Instr
instr (Width -> Format
intFormat Width

    -- Special case for IMUL for bytes, since the result of IMULB will be in
    -- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider
    -- values.
    imulW8 :: CmmExpr -> CmmExpr -> NatM Register
    imulW8 :: CmmExpr -> CmmExpr -> NatM Register
imulW8 CmmExpr
arg_a CmmExpr
arg_b = do
        (a_reg, a_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
        b_code <- getAnyReg arg_b

        let code = InstrBlock
a_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Reg -> InstrBlock
b_code Reg
eax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                   [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Instr
IMUL2 Format
format (Reg -> Operand
OpReg Reg
a_reg) ]
            format = Width -> Format
intFormat Width

        return (Fixed format eax code)

    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
imulMayOflo Width
W8 CmmExpr
a CmmExpr
b = do
         -- The general case (W16, W32, W64) doesn't work for W8 as its
         -- multiplication doesn't use two registers.
         -- The plan is:
         -- 1. truncate and sign-extend a and b to 8bit width
         -- 2. multiply a' = a * b in 32bit width
         -- 3. copy and sign-extend 8bit from a' to c
         -- 4. compare a' and c: they are equal if there was no overflow
         (a_reg, a_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
         (b_reg, b_code) <- getNonClobberedReg b
             code = InstrBlock
a_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
b_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                        [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                           Format -> Operand -> Operand -> Instr
MOVSxL Format
II8 (Reg -> Operand
OpReg Reg
a_reg) (Reg -> Operand
OpReg Reg
                           Format -> Operand -> Operand -> Instr
MOVSxL Format
II8 (Reg -> Operand
OpReg Reg
b_reg) (Reg -> Operand
OpReg Reg
                           Format -> Operand -> Operand -> Instr
IMUL Format
II32 (Reg -> Operand
OpReg Reg
b_reg) (Reg -> Operand
OpReg Reg
                           Format -> Operand -> Operand -> Instr
MOVSxL Format
II8 (Reg -> Operand
OpReg Reg
a_reg) (Reg -> Operand
OpReg Reg
                           Format -> Operand -> Operand -> Instr
CMP Format
II16 (Reg -> Operand
OpReg Reg
a_reg) (Reg -> Operand
OpReg Reg
                           Cond -> Operand -> Instr
NE (Reg -> Operand
OpReg Reg
         return (Fixed II8 eax code)
    imulMayOflo Width
rep CmmExpr
a CmmExpr
b = do
         (a_reg, a_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
         b_code <- getAnyReg b
             shift_amt  = case Width
rep of
W16 -> Int
W32 -> Int
W64 -> Int
w -> String -> Int
forall a. HasCallStack => String -> a
panic (String
"shift_amt: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width

             format = Width -> Format
intFormat Width
             code = InstrBlock
a_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Reg -> InstrBlock
b_code Reg
eax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                        [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                           Format -> Operand -> Instr
IMUL2 Format
format (Reg -> Operand
OpReg Reg
a_reg),   -- result in %edx:%eax
                           Format -> Operand -> Operand -> Instr
SAR Format
format (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
shift_amt)) (Reg -> Operand
OpReg Reg
                                -- sign extend lower part
                           Format -> Operand -> Operand -> Instr
SUB Format
format (Reg -> Operand
OpReg Reg
edx) (Reg -> Operand
OpReg Reg
                                -- compare against upper
                           -- eax==0 if high part == sign extended low part
         return (Fixed format eax code)

    shift_code :: Width
               -> (Format -> Operand -> Operand -> Instr)
               -> CmmExpr
               -> CmmExpr
               -> NatM Register

    {- Case1: shift length as immediate -}
    shift_code :: Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
shift_code Width
width Format -> Operand -> Operand -> Instr
instr CmmExpr
x (CmmLit CmmLit
      -- Handle the case of a shift larger than the width of the shifted value.
      -- This is necessary since x86 applies a mask of 0x1f to the shift
      -- amount, meaning that, e.g., `shr 47, $eax` will actually shift by
      -- `47 & 0x1f == 15`. See #20626.
      | CmmInt Integer
n Width
_ <- CmmLit
      , Integer
n Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Width -> Int
widthInBits Width
      = HasDebugCallStack => CmmExpr -> NatM Register
CmmExpr -> NatM Register
getRegister (CmmExpr -> NatM Register) -> CmmExpr -> NatM Register
forall a b. (a -> b) -> a -> b
$ CmmLit -> CmmExpr
CmmLit (CmmLit -> CmmExpr) -> CmmLit -> CmmExpr
forall a b. (a -> b) -> a -> b
$ Integer -> Width -> CmmLit
CmmInt Integer
0 Width

      | Bool
otherwise = do
          x_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
               format = Width -> Format
intFormat Width
               code Reg
                  = Reg -> InstrBlock
x_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                    Format -> Operand -> Operand -> Instr
instr Format
format (Imm -> Operand
OpImm (CmmLit -> Imm
litToImm CmmLit
lit)) (Reg -> Operand
OpReg Reg
          return (Any format code)

    {- Case2: shift length is complex (non-immediate)
      * y must go in %ecx.
      * we cannot do y first *and* put its result in %ecx, because
        %ecx might be clobbered by x.
      * if we do y second, then x cannot be
        in a clobbered reg.  Also, we cannot clobber x's reg
        with the instruction itself.
      * so we can either:
        - do y first, put its result in a fresh tmp, then copy it to %ecx later
        - do y second and put its result into %ecx.  x gets placed in a fresh
          tmp.  This is likely to be better, because the reg alloc can
          eliminate this reg->reg move here (it won't eliminate the other one,
          because the move is into the fixed %ecx).
      * in the case of C calls the use of ecx here can interfere with arguments.
        We avoid this with the hack described in Note [Evaluate C-call
        arguments before placing in destination registers]
    shift_code Width
width Format -> Operand -> Operand -> Instr
instr CmmExpr
x CmmExpr
y{-amount-} = do
        x_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
        let format = Width -> Format
intFormat Width
        tmp <- getNewRegNat format
        y_code <- getAnyReg y
           code = Reg -> InstrBlock
x_code Reg
tmp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                  Reg -> InstrBlock
y_code Reg
ecx InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
instr Format
format (Reg -> Operand
OpReg Reg
ecx) (Reg -> Operand
OpReg Reg
        return (Fixed format tmp code)

    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
add_code Width
rep CmmExpr
x (CmmLit (CmmInt Integer
y Width
        | Integer -> Bool
is32BitInteger Integer
        , Width
rep Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
/= Width
W8 -- LEA doesn't support byte size (#18614)
        = Width -> CmmExpr -> Integer -> NatM Register
add_int Width
rep CmmExpr
x Integer
    add_code Width
rep CmmExpr
x CmmExpr
y = Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode Width
rep (Format -> Operand -> Operand -> Instr
ADD Format
format) ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just (Format -> Operand -> Operand -> Instr
ADD Format
format)) CmmExpr
x CmmExpr
      where format :: Format
format = Width -> Format
intFormat Width
    -- TODO: There are other interesting patterns we want to replace
    --     with a LEA, e.g. `(x + offset) + (y << shift)`.

    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
sub_code Width
rep CmmExpr
x (CmmLit (CmmInt Integer
y Width
        | Integer -> Bool
is32BitInteger (-Integer
        , Width
rep Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
/= Width
W8 -- LEA doesn't support byte size (#18614)
        = Width -> CmmExpr -> Integer -> NatM Register
add_int Width
rep CmmExpr
x (-Integer
    sub_code Width
rep CmmExpr
x CmmExpr
y = Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode Width
rep (Format -> Operand -> Operand -> Instr
SUB (Width -> Format
intFormat Width
rep)) Maybe (Operand -> Operand -> Instr)
forall a. Maybe a
Nothing CmmExpr
x CmmExpr

    -- our three-operand add instruction:
    add_int :: Width -> CmmExpr -> Integer -> NatM Register
add_int Width
width CmmExpr
x Integer
y = do
        (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
            format = Width -> Format
intFormat Width
            imm = Int -> Imm
ImmInt (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
            code Reg
               = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                 Format -> Operand -> Operand -> Instr
LEA Format
                        (AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
x_reg) EAIndex
EAIndexNone Imm
                        (Reg -> Operand
OpReg Reg
        return (Any format code)


    -- See Note [DIV/IDIV for bytes]
    div_code :: Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
div_code Width
W8 Bool
signed Bool
quotient CmmExpr
x CmmExpr
y = do
        let widen :: MachOp
widen | Bool
signed    = Width -> Width -> MachOp
MO_SS_Conv Width
W8 Width
                  | Bool
otherwise = Width -> Width -> MachOp
MO_UU_Conv Width
W8 Width
        Width -> Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
            (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
            (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr

    div_code Width
width Bool
signed Bool
quotient CmmExpr
x CmmExpr
y = do
           (y_op, y_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem CmmExpr
y -- cannot be clobbered
           x_code <- getAnyReg x
             format = Width -> Format
intFormat Width
             widen | Bool
signed    = Format -> Instr
CLTD Format
                   | Bool
otherwise = Format -> Operand -> Operand -> Instr
XOR Format
format (Reg -> Operand
OpReg Reg
edx) (Reg -> Operand
OpReg Reg

             instr | Bool
signed    = Format -> Operand -> Instr
                   | Bool
otherwise = Format -> Operand -> Instr

             code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                    Reg -> InstrBlock
x_code Reg
eax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                    [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [Instr
widen, Format -> Operand -> Instr
instr Format
format Operand

             result | Bool
quotient  = Reg
                    | Bool
otherwise = Reg

           return (Fixed format result code)

    -- Vector operations---
    vector_float_op_avx :: VectorArithInstns
                        -> Length
                        -> Width
                        -> CmmExpr
                        -> CmmExpr
                        -> NatM Register
    vector_float_op_avx :: VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_avx VectorArithInstns
op Int
l Width
w CmmExpr
expr1 CmmExpr
expr2 = do
      (reg1, exp1) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
      (reg2, exp2) <- getSomeReg expr2
      let format   = case Width
w of
W32 -> Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
W64 -> Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
_ -> String -> SDoc -> Format
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Floating-point AVX vector operation not supported at this width"
                             (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"width:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
          code Reg
dst = case VectorArithInstns
op of
VA_Add -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
VA_Sub -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
VA_Mul -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
VA_Div -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
VA_Min -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr (MinOrMax -> MinMaxType -> Format -> Operand -> Reg -> Reg -> Instr
Min MinMaxType
VA_Max -> (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr (MinOrMax -> MinMaxType -> Format -> Operand -> Reg -> Reg -> Instr
Max MinMaxType
              -- opcode src2 src1 dst <==> dst = src1 `opcode` src2
              arithInstr :: (Format -> Operand -> Reg -> Reg -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Reg -> Reg -> Instr
instr = InstrBlock
exp1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
exp2 InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                 (Format -> Operand -> Reg -> Reg -> Instr
instr Format
format (Reg -> Operand
OpReg Reg
reg2) Reg
reg1 Reg
      return (Any format code)

    vector_float_op_sse :: VectorArithInstns
                        -> Length
                        -> Width
                        -> CmmExpr
                        -> CmmExpr
                        -> NatM Register
    vector_float_op_sse :: VectorArithInstns
-> Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_op_sse VectorArithInstns
op Int
l Width
w CmmExpr
expr1 CmmExpr
expr2 = do
      (reg1, exp1) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
      (reg2, exp2) <- getSomeReg expr2
      let format   = case Width
w of
W32 -> Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
W64 -> Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
_ -> String -> SDoc -> Format
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Floating-point SSE vector operation not supported at this width"
                             (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"width:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
          code Reg
dst = case VectorArithInstns
op of
VA_Add -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
VA_Sub -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
VA_Mul -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
VA_Div -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
VA_Min -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr (MinOrMax -> MinMaxType -> Format -> Operand -> Operand -> Instr
Min MinMaxType
VA_Max -> (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr (MinOrMax -> MinMaxType -> Format -> Operand -> Operand -> Instr
Max MinMaxType
              -- opcode src2 src1 <==> src1 = src1 `opcode` src2
              arithInstr :: (Format -> Operand -> Operand -> Instr) -> InstrBlock
arithInstr Format -> Operand -> Operand -> Instr
                = InstrBlock
exp1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
exp2 InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  (Format -> Operand -> Operand -> Instr
MOVU Format
format (Reg -> Operand
OpReg Reg
reg1) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  (Format -> Operand -> Operand -> Instr
instr Format
format (Reg -> Operand
OpReg Reg
reg2) (Reg -> Operand
OpReg Reg
      return (Any format code)
    vector_float_extract :: Length
                         -> Width
                         -> CmmExpr
                         -> CmmExpr
                         -> NatM Register
    vector_float_extract :: Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract Int
l Width
W32 CmmExpr
expr (CmmLit CmmLit
lit) = do
      (r, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
      let format   = Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
          imm      = CmmLit -> Imm
litToImm CmmLit
          code Reg
            = case CmmLit
lit of
                CmmInt Integer
0 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Operand -> Operand -> Instr
MOV Format
FF32 (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
                CmmInt Integer
_ Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Imm -> Operand -> Reg -> Instr
format Imm
imm (Reg -> Operand
OpReg Reg
r) Reg
_          -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported AVX floating-point vector extract offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
      return (Any FF32 code)
    vector_float_extract Int
l Width
W64 CmmExpr
expr (CmmLit CmmLit
lit) = do
      (r, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
      let format   = Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
          code Reg
            = case CmmLit
lit of
                CmmInt Integer
0 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                              (Format -> Operand -> Operand -> Instr
MOV Format
FF64 (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
                CmmInt Integer
1 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                              (Format -> Reg -> Reg -> Instr
format Reg
r Reg
_          -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported AVX floating-point vector extract offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
      return (Any FF64 code)
    vector_float_extract Int
_ Width
w CmmExpr
c CmmExpr
e =
      String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported AVX floating-point vector extract" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
c SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
e SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width

    vector_float_extract_sse :: Length
                             -> Width
                             -> CmmExpr
                             -> CmmExpr
                             -> NatM Register
    vector_float_extract_sse :: Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_float_extract_sse Int
l Width
W32 CmmExpr
expr (CmmLit CmmLit
      = do
      (r,exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
      let format   = Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
          imm      = CmmLit -> Imm
litToImm CmmLit
          code Reg
            = case CmmLit
lit of
                CmmInt Integer
0 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Operand -> Operand -> Instr
MOVU Format
format (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
                CmmInt Integer
_ Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` (Format -> Imm -> Operand -> Reg -> Instr
format Imm
imm (Reg -> Operand
OpReg Reg
r) Reg
_          -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported SSE floating-point vector extract offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
      return (Any FF32 code)
    vector_float_extract_sse Int
_ Width
w CmmExpr
c CmmExpr
      = String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported SSE floating-point vector extract" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
c SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
e SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width

    vector_int_extract_sse :: Length
                           -> Width
                           -> CmmExpr
                           -> CmmExpr
                           -> NatM Register
    vector_int_extract_sse :: Int -> Width -> CmmExpr -> CmmExpr -> NatM Register
vector_int_extract_sse l :: Int
2 Width
W64 CmmExpr
expr (CmmLit CmmLit
      = do
      (r, exp) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
      let fmt = Int -> ScalarFormat -> Format
VecFormat Int
l ScalarFormat
      tmp <- getNewRegNat fmt
      let code Reg
dst =
            case CmmLit
lit of
              CmmInt Integer
0 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                            (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
r) (Reg -> Operand
OpReg Reg
              CmmInt Integer
1 Width
_ -> InstrBlock
exp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                            (Format -> Reg -> Reg -> Instr
fmt Reg
r Reg
tmp) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                            (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
_          -> String -> InstrBlock
forall a. HasCallStack => String -> a
panic String
"Error in offset while unpacking"
      return (Any II64 code)
    vector_int_extract_sse Int
_ Width
w CmmExpr
c CmmExpr
      = String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported SSE floating-point vector extract" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
c SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
e SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width

    vector_shuffle_float :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
    vector_shuffle_float :: Int -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
vector_shuffle_float Int
l Width
w CmmExpr
v1 CmmExpr
v2 [Int]
is = do
      (r1, exp1) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
      (r2, exp2) <- getSomeReg v2
      let fmt = Int -> ScalarFormat -> Format
VecFormat Int
l (if Width
w Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W32 then ScalarFormat
FmtFloat else ScalarFormat
          code Reg
            = InstrBlock
exp1 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` (InstrBlock
exp2 InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Format -> Reg -> Reg -> [Int] -> Reg -> InstrBlock
shuffleInstructions Format
fmt Reg
r1 Reg
r2 [Int]
is Reg
      return (Any fmt code)

    shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> OrdList Instr
    shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> InstrBlock
shuffleInstructions Format
fmt Reg
v1 Reg
v2 [Int]
is Reg
dst =
      case Format
fmt of
        VecFormat Int
2 ScalarFormat
FmtDouble ->
          case [Int]
is of
i1, Int
i2] -> case (Int
i1, Int
i2) of
0) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
v1) Reg
v1 Reg
1) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b11) (Reg -> Operand
OpReg Reg
v1) Reg
v1 Reg
2) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
v2) Reg
v2 Reg
3) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b11) (Reg -> Operand
OpReg Reg
v2) Reg
v2 Reg
1) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
VMOVU Format
fmt (Reg -> Operand
OpReg Reg
v1) (Reg -> Operand
OpReg Reg
3) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
VMOVU Format
fmt (Reg -> Operand
OpReg Reg
v2) (Reg -> Operand
OpReg Reg
0) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b01) (Reg -> Operand
OpReg Reg
v1) Reg
v1 Reg
2) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b01) (Reg -> Operand
OpReg Reg
v2) Reg
v2 Reg
2) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
0) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
3) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b10) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
0) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b01) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
2) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b01) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
1) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b10) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
3) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b11) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
1) -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
0b11) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
              (Int, Int)
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: indices out of bounds 0 <= i <= 3" ([Int] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Int]
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: wrong number of indices (expected 2)" ([Int] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Int]
        VecFormat Int
4 ScalarFormat
          -- indices 0 <= i <= 7
          | (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ( (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0) (Int -> Bool) -> (Int -> Bool) -> Int -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
<&&> (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
7) ) [Int]
is ->
          case [Int]
is of
i1, Int
i2, Int
i3, Int
              | (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ( Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
3 ) [Int]
              , let imm :: Int
imm = Int
i1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i2 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i3 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i4 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
              -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
imm) (Reg -> Operand
OpReg Reg
v1) Reg
v1 Reg
              | (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ( Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4 ) [Int]
              , let [Int
j1, Int
j2, Int
j3, Int
j4] = (Int -> Int) -> [Int] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ( Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract Int
4 ) [Int]
                    imm :: Int
imm = Int
j1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j2 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j3 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j4 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
              -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
imm) (Reg -> Operand
OpReg Reg
v2) Reg
v2 Reg
              | Int
i1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
3, Int
i2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
              , Int
i3 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4, Int
i4 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
              , let imm :: Int
imm = Int
i1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i2 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i3 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i4 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
              -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
imm) (Reg -> Operand
OpReg Reg
v2) Reg
v1 Reg
              | Int
i1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4, Int
i2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
              , Int
i3 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
3, Int
i4 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
              , let imm :: Int
imm = (Int
i1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i3 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i4 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
              -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Imm -> Operand -> Reg -> Reg -> Instr
VSHUF Format
fmt (Int -> Imm
ImmInt Int
imm) (Reg -> Operand
OpReg Reg
v1) Reg
v2 Reg
              | Bool
              -- Fall-back code with 4 INSERTPS operations.
              -- SIMD NCG TODO: handle more cases with better lowering.
              let -- bits: ss_dd_zzzz
                  -- ss: pick source location
                  -- dd: pick destination location
                  -- zzzz: pick locations to be zeroed
                  insertImm :: a -> a -> a
insertImm a
src a
dst = a -> Int -> a
forall a. Bits a => a -> Int -> a
shiftL   ( a
src a -> a -> a
forall a. Integral a => a -> a -> a
`mod` a
4 ) Int
                                    a -> a -> a
forall a. Bits a => a -> a -> a
.|. a -> Int -> a
forall a. Bits a => a -> Int -> a
shiftL a
dst Int
                  vec :: Int -> Reg
vec Int
src = if Int
src Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4 then Reg
v2 else Reg
              in Instr -> InstrBlock
forall a. a -> OrdList a
                (Format -> Imm -> Operand -> Reg -> Instr
fmt (Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Integral a) => a -> a -> a
insertImm Int
i1 Int
0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0b1110) (Reg -> Operand
OpReg (Reg -> Operand) -> Reg -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Reg
vec Int
i1) Reg
                InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                (Format -> Imm -> Operand -> Reg -> Instr
fmt (Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Integral a) => a -> a -> a
insertImm Int
i2 Int
1) (Reg -> Operand
OpReg (Reg -> Operand) -> Reg -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Reg
vec Int
i2) Reg
                InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                (Format -> Imm -> Operand -> Reg -> Instr
fmt (Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Integral a) => a -> a -> a
insertImm Int
i3 Int
2) (Reg -> Operand
OpReg (Reg -> Operand) -> Reg -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Reg
vec Int
i3) Reg
                InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                (Format -> Imm -> Operand -> Reg -> Instr
fmt (Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Integral a) => a -> a -> a
insertImm Int
i4 Int
3) (Reg -> Operand
OpReg (Reg -> Operand) -> Reg -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Reg
vec Int
i4) Reg
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: wrong number of indices (expected 4)" ([Int] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Int]
          | Bool
          -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: indices out of bounds 0 <= i <= 7" ([Int] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [Int]
_ ->
          String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"vector shuffle: unsupported format" (Format -> SDoc
forall a. Outputable a => a -> SDoc
ppr Format

getRegister' Platform
platform Bool
_is32Bit (CmmMachOp MachOp
mop [CmmExpr
x, CmmExpr
y, CmmExpr
z]) = do -- ternary MachOps
  avx    <- NatM Bool
  sse4_1 <- sse4_1Enabled
  case mop of
      -- Floating point fused multiply-add operations @ ± x*y ± z@
      MO_FMA FMASign
var Int
l Width
        | Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
* Width -> Int
widthInBits Width
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
        -> String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Please use -fllvm for wide vector FMA support"
        | Bool
        -> Int
-> Width
-> FMASign
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM Register
genFMA3Code Int
l Width
w FMASign
var CmmExpr
x CmmExpr
y CmmExpr

      -- Ternary vector operations
      MO_VF_Insert Int
l Width
W32  | Bool
sse4_1 -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_float_insert_sse Int
l CmmExpr
x CmmExpr
y CmmExpr
                          | Bool
                          -> String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"FloatX4# operations require either -msse4 or -fllvm"
      MO_VF_Insert Int
l Width
W64  -> Bool -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_double_insert Bool
avx Int
l CmmExpr
x CmmExpr
y CmmExpr
      MO_V_Insert Int
l Width
W64   -> HasCallStack =>
Int -> Width -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
Int -> Width -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_int_insert_sse Int
l Width
W64 CmmExpr
x CmmExpr
y CmmExpr

_other -> String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86) - ternary CmmMachOp (1)"
                  (MachOp -> SDoc
pprMachOp MachOp

    --   - add support for FloatX8, FloatX16.
    vector_float_insert_sse :: Length
                            -> CmmExpr
                            -> CmmExpr
                            -> CmmExpr
                            -> NatM Register
    -- FloatX4
    vector_float_insert_sse :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_float_insert_sse len :: Int
4 CmmExpr
vecExpr CmmExpr
valExpr (CmmLit (CmmInt Integer
offset Width
      = do
      (r, exp)    <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
      fn          <- getAnyReg vecExpr
      let fmt      = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
          imm      = CmmLit -> Imm
litToImm (Integer -> Width -> CmmLit
CmmInt (Integer
offset Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) Width
          code Reg
dst = InstrBlock
exp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                     (Reg -> InstrBlock
fn Reg
dst) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                     (Format -> Imm -> Operand -> Reg -> Instr
fmt Imm
imm (Reg -> Operand
OpReg Reg
r) Reg
       in return $ Any fmt code
    vector_float_insert_sse Int
len CmmExpr
_ CmmExpr
_ CmmExpr
      = String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Unsupported vector insert operation" (SDoc -> NatM Register) -> SDoc -> NatM Register
forall a b. (a -> b) -> a -> b
          [SDoc] -> SDoc
forall doc. IsDoc doc => [doc] -> doc
            [ String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"FloatX" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<> Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int
len SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<> String -> SDoc
forall doc. IsLine doc => String -> doc
text String
            , String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"offset:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
offset ]

    --   - add support for FloatX8, FloatX16.
    vector_double_insert :: Bool
                         -> Length
                         -> CmmExpr
                         -> CmmExpr
                         -> CmmExpr
                         -> NatM Register
    -- DoubleX2
    vector_double_insert :: Bool -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_double_insert Bool
avx len :: Int
2 CmmExpr
vecExpr CmmExpr
valExpr (CmmLit CmmLit
      = do
        (valReg, valExp) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
        (vecReg, vecExp) <- getSomeReg vecExpr -- NB: vector regs never clobbered by instruction
        let movu = if Bool
avx then Format -> Operand -> Operand -> Instr
VMOVU else Format -> Operand -> Operand -> Instr
            fmt = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
            code Reg
              = case CmmLit
offset of
                  CmmInt Integer
0 Width
_ -> InstrBlock
valExp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
vecExp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Operand -> Operand -> Instr
movu (Int -> ScalarFormat -> Format
VecFormat Int
2 ScalarFormat
FmtDouble) (Reg -> Operand
OpReg Reg
vecReg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Operand -> Operand -> Instr
MOV (Int -> ScalarFormat -> Format
VecFormat Int
2 ScalarFormat
FmtDouble) (Reg -> Operand
OpReg Reg
valReg) (Reg -> Operand
OpReg Reg
                  CmmInt Integer
1 Width
_ -> InstrBlock
valExp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
vecExp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Operand -> Operand -> Instr
movu (Int -> ScalarFormat -> Format
VecFormat Int
2 ScalarFormat
FmtDouble) (Reg -> Operand
OpReg Reg
vecReg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Imm -> Operand -> Reg -> Instr
SHUF Format
fmt (Int -> Imm
ImmInt Int
0b00) (Reg -> Operand
OpReg Reg
valReg) Reg
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"MO_VF_Insert DoubleX2: unsupported offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
         in return $ Any fmt code
    vector_double_insert Bool
_ Int
_ CmmExpr
_ CmmExpr
_ CmmExpr
_ =
      String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Unsupported floating-point vector insert operation; please use -fllvm"
    -- For DoubleX4: use VSHUFPD.
    -- For DoubleX8: use something like vinsertf64x2 followed by vpblendd?

    --   - only supports Int64X2, add support for everything else:
    --     (Int32X{4,2}, Int16X{8,4,2}, Int8X{16,8,4,2})
    vector_int_insert_sse :: HasCallStack => Length
                          -> Width
                          -> CmmExpr
                          -> CmmExpr
                          -> CmmExpr
                          -> NatM Register
    -- Int64X2
    vector_int_insert_sse :: HasCallStack =>
Int -> Width -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
vector_int_insert_sse len :: Int
2 Width
W64 CmmExpr
vecExpr CmmExpr
valExpr (CmmLit CmmLit
      = do
        (valReg, valExp) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
        (vecReg, vecExp) <- getSomeReg vecExpr -- NB: vector regs never clobbered by instruction
        let fmt = Int -> ScalarFormat -> Format
VecFormat Int
len ScalarFormat
        tmp <- getNewRegNat fmt
        let code Reg
              = case CmmLit
offset of
                  CmmInt Integer
0 Width
_ -> InstrBlock
valExp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
vecExp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Reg -> Reg -> Instr
fmt Reg
vecReg Reg
tmp) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
valReg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Operand -> Reg -> Instr
fmt (Reg -> Operand
OpReg Reg
tmp) Reg
                  CmmInt Integer
1 Width
_ -> InstrBlock
valExp InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
vecExp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Operand -> Operand -> Instr
MOV Format
II64 (Reg -> Operand
OpReg Reg
vecReg) (Reg -> Operand
OpReg Reg
dst)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Operand -> Operand -> Instr
MOVD Format
II64 (Reg -> Operand
OpReg Reg
valReg) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                                (Format -> Operand -> Reg -> Instr
fmt (Reg -> Operand
OpReg Reg
tmp) Reg
_ -> String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"MO_V_Insert Int64X2: unsupported offset" (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit
         in return $ Any fmt code
    vector_int_insert_sse Int
_ Width
_ CmmExpr
_ CmmExpr
_ CmmExpr
_ =
      String -> NatM Register
forall a. HasCallStack => String -> a
sorry String
"Unsupported integer vector insert operation; please use -fllvm"

getRegister' Platform
_ Bool
_ (CmmMachOp MachOp
mop (CmmExpr
_)) =
  String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86): MachOp with >= 4 arguments" (String -> SDoc
forall doc. IsLine doc => String -> doc
text (String -> SDoc) -> String -> SDoc
forall a b. (a -> b) -> a -> b
$ MachOp -> String
forall a. Show a => a -> String
show MachOp

getRegister' Platform
platform Bool
is32Bit load :: CmmExpr
load@(CmmLoad CmmExpr
mem CmmType
ty AlignmentSpec
  | CmmType -> Bool
isVecType CmmType
  = do
    config <- NatM NCGConfig
    Amode addr mem_code <- getAmode mem
    let code Reg
dst =
mem_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
            HasDebugCallStack =>
NCGConfig -> Format -> Operand -> Operand -> Instr
NCGConfig -> Format -> Operand -> Operand -> Instr
movInstr NCGConfig
config Format
format (AddrMode -> Operand
OpAddr AddrMode
addr) (Reg -> Operand
OpReg Reg
    return (Any format code)
  | CmmType -> Bool
isFloatType CmmType
  = do
    Amode addr mem_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
    loadAmode (floatFormat width) addr mem_code

  | Bool
is32Bit Bool -> Bool -> Bool
&& Bool -> Bool
not (CmmType -> Bool
isWord64 CmmType
  = do
      instr :: Operand -> Operand -> Instr
instr = case Width
width of
W8     -> Format -> Operand -> Operand -> Instr
MOVZxL Format
                  -- We always zero-extend 8-bit loads, if we
                  -- can't think of anything better.  This is because
                  -- we can't guarantee access to an 8-bit variant of every register
                  -- (esi and edi don't have 8-bit variants), so to make things
                  -- simpler we do our 8-bit arithmetic with full 32-bit registers.
_other -> Format -> Operand -> Operand -> Instr
MOV Format
    code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode Operand -> Operand -> Instr
instr CmmExpr
    return (Any format code)

  | Bool -> Bool
not Bool
  -- Simpler memory load code on x86_64
  = do
    code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOV Format
format) CmmExpr
    return (Any format code)

  | Bool
  = String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86) CmmLoad" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr
    format :: Format
format = CmmType -> Format
cmmTypeFormat CmmType
    width :: Width
width = CmmType -> Width
typeWidth CmmType

-- Handle symbol references with LEA and %rip-relative addressing.
-- See Note [%rip-relative addressing on x86-64].
getRegister' Platform
platform Bool
is32Bit (CmmLit CmmLit
  | CmmLit -> Bool
is_label CmmLit
  , Bool -> Bool
not Bool
  = do let format :: Format
format = CmmType -> Format
cmmTypeFormat (Platform -> CmmLit -> CmmType
cmmLitType Platform
platform CmmLit
           imm :: Imm
imm = CmmLit -> Imm
litToImm CmmLit
           op :: Operand
op = AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex EABase
EABaseRip EAIndex
EAIndexNone Imm
           code :: Reg -> InstrBlock
code Reg
dst = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
LEA Format
format Operand
op (Reg -> Operand
OpReg Reg
       Register -> NatM Register
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Format -> (Reg -> InstrBlock) -> Register
Any Format
format Reg -> InstrBlock
    is_label :: CmmLit -> Bool
is_label (CmmLabel {})        = Bool
    is_label (CmmLabelOff {})     = Bool
    is_label (CmmLabelDiffOff {}) = Bool
    is_label CmmLit
_                    = Bool

getRegister' Platform
platform Bool
is32Bit (CmmLit CmmLit
lit) = do
  avx <- NatM Bool

  -- NB: it is important that the code produced here (to load a literal into
  -- a register) doesn't clobber any registers other than the destination
  -- register; the code for generating C calls relies on this property.
  -- In particular, we have:
  -- > loadIntoRegMightClobberOtherReg (CmmLit _) = False
  -- which means that we assume that loading a literal into a register
  -- will not clobber any other registers.

  -- TODO: this function mishandles floating-point negative zero,
  -- because -0.0 == 0.0 returns True and because we represent CmmFloat as
  -- Rational, which can't properly represent negative zero.

    -- Zero: use XOR.
    | isZeroLit lit
    -> let code Reg
             | Format -> Bool
isIntFormat Format
             = let fmt' :: Format
                     | Bool
                     = Format
                     | Bool
                     -- x86_64: 32-bit xor is one byte shorter,
                     -- and zero-extends to 64 bits
                     = case Format
fmt of
II64 -> Format
_ -> Format
               in Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
XOR Format
fmt' (Reg -> Operand
OpReg Reg
dst) (Reg -> Operand
OpReg Reg
             | Bool
             = if Bool
               then Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Reg -> Reg -> Instr
VXOR Format
fmt (Reg -> Operand
OpReg Reg
dst) Reg
dst Reg
               else Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Reg -> Reg -> Reg -> Instr
VPXOR Format
fmt Reg
dst Reg
dst Reg
             | Bool
             = if Bool
               then Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
XOR Format
fmt (Reg -> Operand
OpReg Reg
dst) (Reg -> Operand
OpReg Reg
               else Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Reg -> Instr
PXOR Format
fmt (Reg -> Operand
OpReg Reg
dst) Reg
       in return $ Any fmt code

    -- Constant vector: use broadcast.
    | VecFormat l sFmt <- fmt
    , CmmVec (f:fs) <- lit
    , all (== f) fs
    -> do let w = ScalarFormat -> Width
scalarWidth ScalarFormat
              broadcast = if ScalarFormat -> Bool
isFloatScalarFormat ScalarFormat
                          then Int -> Width -> MachOp
MO_VF_Broadcast Int
l Width
                          else Int -> Width -> MachOp
MO_V_Broadcast Int
l Width
          valCode <- getAnyReg (CmmMachOp broadcast [CmmLit f])
          return $ Any fmt valCode

    -- Optimisation for loading small literals on x86_64: take advantage
    -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
    -- instruction forms are shorter.
    | not is32Bit, isWord64 cmmTy, not (isBigLit lit)
    -> let
          imm = CmmLit -> Imm
litToImm CmmLit
          code Reg
dst = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm Imm
imm) (Reg -> Operand
OpReg Reg
          return (Any II64 code)

    -- Scalar integer: use an immediate.
    | isIntFormat fmt
    -> let imm = CmmLit -> Imm
litToImm CmmLit
           code Reg
dst = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
fmt (Imm -> Operand
OpImm Imm
imm) (Reg -> Operand
OpReg Reg
       in return (Any fmt code)

    -- General case: load literal from data address.
    | otherwise
    -> do let w = Format -> Width
formatToWidth Format
          Amode addr addr_code <- memConstant (mkAlignment $ widthInBytes w) lit
          loadAmode fmt addr addr_code

      cmmTy :: CmmType
cmmTy = Platform -> CmmLit -> CmmType
cmmLitType Platform
platform CmmLit
      fmt :: Format
fmt = CmmType -> Format
cmmTypeFormat CmmType
      float_or_floatvec :: Bool
float_or_floatvec = Format -> Bool
isFloatOrFloatVecFormat Format
      isZeroLit :: CmmLit -> Bool
isZeroLit (CmmInt Integer
i Width
_) = Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
      isZeroLit (CmmFloat Rational
f Width
_) = Rational
f Rational -> Rational -> Bool
forall a. Eq a => a -> a -> Bool
== Rational
0 -- TODO: mishandles negative zero
      isZeroLit (CmmVec [CmmLit]
fs) = (CmmLit -> Bool) -> [CmmLit] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all CmmLit -> Bool
isZeroLit [CmmLit]
      isZeroLit CmmLit
_ = Bool

      isBigLit :: CmmLit -> Bool
isBigLit (CmmInt Integer
i Width
_) = Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
0 Bool -> Bool -> Bool
|| Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
      isBigLit CmmLit
_ = Bool
        -- note1: not the same as (not.is32BitLit), because that checks for
        -- signed literals that fit in 32 bits, but we want unsigned
        -- literals here.
        -- note2: all labels are small, because we're assuming the
        -- small memory model. See Note [%rip-relative addressing on x86-64].

getRegister' Platform
platform Bool
_ slot :: CmmExpr
slot@(CmmStackSlot {}) =
  String -> SDoc -> NatM Register
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"getRegister(x86) CmmStackSlot" (Platform -> CmmExpr -> SDoc
forall env a. OutputableP env a => env -> a -> SDoc
pdoc Platform
platform CmmExpr

intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
   -> NatM (Reg -> InstrBlock)
intLoadCode :: (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode Operand -> Operand -> Instr
instr CmmExpr
mem = do
  Amode src mem_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
  return (\Reg
dst -> InstrBlock
mem_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Operand -> Operand -> Instr
instr (AddrMode -> Operand
OpAddr AddrMode
src) (Reg -> Operand
OpReg Reg

-- Compute an expression into *any* register, adding the appropriate
-- move instruction if necessary.
getAnyReg :: HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg :: HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
expr = do
  r <- HasDebugCallStack => CmmExpr -> NatM Register
CmmExpr -> NatM Register
getRegister CmmExpr
  anyReg r

anyReg :: HasDebugCallStack => Register -> NatM (Reg -> InstrBlock)
anyReg :: HasDebugCallStack => Register -> NatM (Reg -> InstrBlock)
anyReg (Any Format
_ Reg -> InstrBlock
code)          = (Reg -> InstrBlock) -> NatM (Reg -> InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return Reg -> InstrBlock
anyReg (Fixed Format
rep Reg
reg InstrBlock
fcode) = do
  config <- NatM NCGConfig
  return (\Reg
dst -> InstrBlock
fcode InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr
NCGConfig -> Format -> Reg -> Reg -> Instr
mkRegRegMoveInstr NCGConfig
config Format
rep Reg
reg Reg

-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
getByteReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
getByteReg CmmExpr
expr = do
  config <- NatM NCGConfig
  is32Bit <- is32BitPlatform
  if is32Bit
      then do r <- getRegister expr
              case r of
                Any Format
rep Reg -> InstrBlock
code -> do
                    tmp <- Format -> NatM Reg
getNewRegNat Format
                    return (tmp, code tmp)
                Fixed Format
rep Reg
reg InstrBlock
                    | Reg -> Bool
isVirtualReg Reg
reg -> (Reg, InstrBlock) -> NatM (Reg, InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reg
                    | Bool
otherwise -> do
                        tmp <- Format -> NatM Reg
getNewRegNat Format
                        return (tmp, code `snocOL` mkRegRegMoveInstr config rep reg tmp)
                    -- ToDo: could optimise slightly by checking for
                    -- byte-addressable real registers, but that will
                    -- happen very rarely if at all.
      else getSomeReg expr -- all regs are byte-addressable on x86_64

-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
getNonClobberedReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg :: HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
expr = do
  r <- HasDebugCallStack => CmmExpr -> NatM Register
CmmExpr -> NatM Register
getRegister CmmExpr
  config <- getConfig
  let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
  case r of
    Any Format
rep Reg -> InstrBlock
code -> do
        tmp <- Format -> NatM Reg
getNewRegNat Format
        return (tmp, code tmp)
    Fixed Format
rep Reg
reg InstrBlock
        -- only certain regs can be clobbered
        | Reg
reg Reg -> [Reg] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Platform -> [Reg]
instrClobberedRegs Platform
        -> do
                tmp <- Format -> NatM Reg
getNewRegNat Format
                return (tmp, code `snocOL` mkRegRegMoveInstr config rep reg tmp)
        | Bool
otherwise ->
                (Reg, InstrBlock) -> NatM (Reg, InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reg
reg, InstrBlock


-- | Convert a 'CmmExpr' representing a memory address into an 'Amode'.
-- An 'Amode' is a datatype representing a valid address form for the target
-- (e.g. "Base + Index + disp" or immediate) and the code to compute it.
getAmode :: CmmExpr -> NatM Amode
getAmode :: CmmExpr -> NatM Amode
getAmode CmmExpr
e = do
   platform <- NatM Platform
   let is32Bit = Platform -> Bool
target32Bit Platform

   case e of
      CmmRegOff CmmReg
r Int
         -> CmmExpr -> NatM Amode
getAmode (CmmExpr -> NatM Amode) -> CmmExpr -> NatM Amode
forall a b. (a -> b) -> a -> b
$ CmmReg -> Int -> CmmExpr
mangleIndexTree CmmReg
r Int

      CmmMachOp (MO_Add Width
W64) [CmmReg (CmmGlobal (GlobalRegUse GlobalReg
PicBaseReg CmmType
_)), CmmLit CmmLit
         | Bool -> Bool
not Bool
         -> Amode -> NatM Amode
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Amode -> NatM Amode) -> Amode -> NatM Amode
forall a b. (a -> b) -> a -> b
$ AddrMode -> InstrBlock -> Amode
Amode (Imm -> AddrMode
ripRel (CmmLit -> Imm
litToImm CmmLit
displacement)) InstrBlock
forall a. OrdList a

      -- This is all just ridiculous, since it carefully undoes
      -- what mangleIndexTree has just done.
      CmmMachOp (MO_Sub Width
_rep) [CmmExpr
x, CmmLit lit :: CmmLit
lit@(CmmInt Integer
i Width
         | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
         -- assert (rep == II32)???
         -> do
            (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
            let off = Int -> Imm
ImmInt (-(Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
            return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)

      CmmMachOp (MO_Add Width
_rep) [CmmExpr
x, CmmLit CmmLit
         | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
         -- assert (rep == II32)???
         -> do
            (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
            let off = CmmLit -> Imm
litToImm CmmLit
            return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)

      -- Turn (lit1 << n  + lit2) into  (lit2 + lit1 << n) so it will be
      -- recognised by the next rule.
      CmmMachOp (MO_Add Width
rep) [a :: CmmExpr
a@(CmmMachOp (MO_Shl Width
_) [CmmExpr]
_), b :: CmmExpr
b@(CmmLit CmmLit
         -> CmmExpr -> NatM Amode
getAmode (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> MachOp
MO_Add Width
rep) [CmmExpr

      -- Matches: (x + offset) + (y << shift)
      CmmMachOp (MO_Add Width
_) [CmmRegOff CmmReg
x Int
offset, CmmMachOp (MO_Shl Width
_) [CmmExpr
y, CmmLit (CmmInt Integer
shift Width
         | Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
1 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
2 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
         -> CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode (CmmReg -> CmmExpr
CmmReg CmmReg
x) CmmExpr
y Integer
shift (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int

      CmmMachOp (MO_Add Width
_) [CmmExpr
x, CmmMachOp (MO_Shl Width
_) [CmmExpr
y, CmmLit (CmmInt Integer
shift Width
         | Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
1 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
2 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
         -> CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode CmmExpr
x CmmExpr
y Integer
shift Integer

      CmmMachOp (MO_Add Width
_) [CmmExpr
x, CmmMachOp (MO_Add Width
_) [CmmMachOp (MO_Shl Width
y, CmmLit (CmmInt Integer
shift Width
_)], CmmLit (CmmInt Integer
offset Width
         | Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
1 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
2 Bool -> Bool -> Bool
|| Integer
shift Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
         Bool -> Bool -> Bool
&& Integer -> Bool
is32BitInteger Integer
         -> CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode CmmExpr
x CmmExpr
y Integer
shift Integer

      CmmMachOp (MO_Add Width
_) [CmmExpr
         | Bool -> Bool
not (CmmExpr -> Bool
isLit CmmExpr
y) -- we already handle valid literals above.
         -> CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode CmmExpr
x CmmExpr
y Integer
0 Integer

      CmmLit lit :: CmmLit
lit@(CmmFloat {})
        -> String -> SDoc -> NatM Amode
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"X86 CodeGen: attempt to use floating-point value as a memory address"
             (CmmLit -> SDoc
forall a. Outputable a => a -> SDoc
ppr CmmLit

      -- Handle labels with %rip-relative addressing since in general the image
      -- may be loaded anywhere in the 64-bit address space (e.g. on Windows
      -- with high-entropy ASLR). See Note [%rip-relative addressing on x86-64].
      CmmLit CmmLit
         | Bool -> Bool
not Bool
         , CmmLit -> Bool
is_label CmmLit
         -> Amode -> NatM Amode
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (AddrMode -> InstrBlock -> Amode
Amode (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex EABase
EABaseRip EAIndex
EAIndexNone (CmmLit -> Imm
litToImm CmmLit
lit)) InstrBlock
forall a. OrdList a

      CmmLit CmmLit
         | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
         -> Amode -> NatM Amode
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (AddrMode -> InstrBlock -> Amode
Amode (Imm -> Int -> AddrMode
ImmAddr (CmmLit -> Imm
litToImm CmmLit
lit) Int
0) InstrBlock
forall a. OrdList a

      -- Literal with offsets too big (> 32 bits) fails during the linking phase
      -- (#15570). We already handled valid literals above so we don't have to
      -- test anything here.
      CmmLit (CmmLabelOff CLabel
l Int
         -> CmmExpr -> NatM Amode
getAmode (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> MachOp
MO_Add Width
W64) [ CmmLit -> CmmExpr
CmmLit (CLabel -> CmmLit
CmmLabel CLabel
                                             , CmmLit -> CmmExpr
CmmLit (Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off) Width
      CmmLit (CmmLabelDiffOff CLabel
l1 CLabel
l2 Int
off Width
         -> CmmExpr -> NatM Amode
getAmode (MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> MachOp
MO_Add Width
W64) [ CmmLit -> CmmExpr
CmmLit (CLabel -> CLabel -> Int -> Width -> CmmLit
CmmLabelDiffOff CLabel
l1 CLabel
l2 Int
0 Width
                                             , CmmLit -> CmmExpr
CmmLit (Integer -> Width -> CmmLit
CmmInt (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off) Width

      -- in case we can't do something better, we just compute the expression
      -- and put the result in a register
_ -> do
        (reg,code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
        return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
    is_label :: CmmLit -> Bool
is_label (CmmLabel{}) = Bool
    is_label (CmmLabelOff{}) = Bool
    is_label (CmmLabelDiffOff{}) = Bool
    is_label CmmLit
_ = Bool

-- | Like 'getAmode', but on 32-bit use simple register addressing
-- (i.e. no index register). This stops us from running out of
-- registers on x86 when using instructions such as cmpxchg, which can
-- use up to three virtual registers and one fixed register.
getSimpleAmode :: CmmExpr -> NatM Amode
getSimpleAmode :: CmmExpr -> NatM Amode
getSimpleAmode CmmExpr
addr = NatM Bool
is32BitPlatform NatM Bool -> (Bool -> NatM Amode) -> NatM Amode
forall a b. NatM a -> (a -> NatM b) -> NatM b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
False -> CmmExpr -> NatM Amode
getAmode CmmExpr
True  -> do
    addr_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
    config <- getConfig
    addr_r <- getNewRegNat (intFormat (ncgWordWidth config))
    let amode = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
addr_r) EAIndex
EAIndexNone (Int -> Imm
ImmInt Int
    return $! Amode amode (addr_code addr_r)

x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode CmmExpr
base CmmExpr
index Integer
shift Integer
  = do (x_reg, x_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
        -- x must be in a temp, because it has to stay live over y_code
        -- we could compare x_reg and y_reg and do something better here...
       (y_reg, y_code) <- getSomeReg index
           code = InstrBlock
x_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
           base = case Integer
shift of Integer
0 -> Int
1; Integer
1 -> Int
2; Integer
2 -> Int
4; Integer
3 -> Int
n -> String -> Int
forall a. HasCallStack => String -> a
panic (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ String
"x86_complex_amode: unhandled shift! (" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Integer -> String
forall a. Show a => a -> String
show Integer
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))

-- -----------------------------------------------------------------------------
-- getOperand: sometimes any operand will do.

-- getNonClobberedOperand: the value of the operand will remain valid across
-- the computation of an arbitrary expression, unless the expression
-- is computed directly into a register which the operand refers to
-- (see trivialCode where this function is used for an example).

getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand (CmmLit CmmLit
lit) =
  if CmmLit -> Bool
isSuitableFloatingPointLit CmmLit
  then do
    let CmmFloat Rational
_ Width
w = CmmLit
    Amode addr code <- Alignment -> CmmLit -> NatM Amode
memConstant (Int -> Alignment
mkAlignment (Int -> Alignment) -> Int -> Alignment
forall a b. (a -> b) -> a -> b
$ Width -> Int
widthInBytes Width
w) CmmLit
    return (OpAddr addr, code)
  else do
    platform <- NatM Platform
    if is32BitLit platform lit && isIntFormat (cmmTypeFormat (cmmLitType platform lit))
    then return (OpImm (litToImm lit), nilOL)
    else getNonClobberedOperand_generic (CmmLit lit)

getNonClobberedOperand (CmmLoad CmmExpr
mem CmmType
ty AlignmentSpec
_) = do
  is32Bit <- NatM Bool
  -- this logic could be simplified
  if   (if is32Bit then not (isWord64 ty) else True)
      -- if 32bit and ty is at float/double/simd value
      -- or if 64bit
      --  this could use some eyeballs or i'll need to stare at it more later
    then do
      platform <- ncgPlatform <$> getConfig
      Amode src mem_code <- getAmode mem
      (src',save_code) <-
        if (amodeCouldBeClobbered platform src)
                then do
                   tmp <- getNewRegNat (archWordFormat is32Bit)
                   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
                           unitOL (LEA (archWordFormat is32Bit)
                                       (OpAddr src)
                                       (OpReg tmp)))
                   return (src, nilOL)
      return (OpAddr src', mem_code `appOL` save_code)
      -- if its a word or gcptr on 32bit?
      getNonClobberedOperand_generic (CmmLoad mem ty NaturallyAligned)

getNonClobberedOperand CmmExpr
e = CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand_generic CmmExpr

getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand_generic CmmExpr
e = do
  (reg, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
  return (OpReg reg, code)

amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
amodeCouldBeClobbered Platform
platform AddrMode
amode = (Reg -> Bool) -> [Reg] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Platform -> Reg -> Bool
regClobbered Platform
platform) (AddrMode -> [Reg]
addrModeRegs AddrMode

regClobbered :: Platform -> Reg -> Bool
regClobbered :: Platform -> Reg -> Bool
regClobbered Platform
platform (RegReal (RealRegSingle Int
rr)) = Platform -> Int -> Bool
freeReg Platform
platform Int
regClobbered Platform
_ Reg
_ = Bool

-- getOperand: the operand is not required to remain valid across the
-- computation of an arbitrary expression.
getOperand :: CmmExpr -> NatM (Operand, InstrBlock)

getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getOperand (CmmLit CmmLit
lit) = do
  if CmmLit -> Bool
isSuitableFloatingPointLit CmmLit
    then do
      let CmmFloat Rational
_ Width
w = CmmLit
      Amode addr code <- Alignment -> CmmLit -> NatM Amode
memConstant (Int -> Alignment
mkAlignment (Int -> Alignment) -> Int -> Alignment
forall a b. (a -> b) -> a -> b
$ Width -> Int
widthInBytes Width
w) CmmLit
      return (OpAddr addr, code)
    else do

  platform <- NatM Platform
  if is32BitLit platform lit && (isIntFormat $ cmmTypeFormat (cmmLitType platform lit))
    then return (OpImm (litToImm lit), nilOL)
    else getOperand_generic (CmmLit lit)

getOperand (CmmLoad CmmExpr
mem CmmType
ty AlignmentSpec
_) = do
  is32Bit <- NatM Bool
  if isIntFormat (cmmTypeFormat ty) && (if is32Bit then not (isWord64 ty) else True)
     then do
       Amode src mem_code <- getAmode mem
       return (OpAddr src, mem_code)
       getOperand_generic (CmmLoad mem ty NaturallyAligned)

getOperand CmmExpr
e = CmmExpr -> NatM (Operand, InstrBlock)
getOperand_generic CmmExpr

getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getOperand_generic CmmExpr
e = do
    (reg, code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
    return (OpReg reg, code)

isOperand :: Platform -> CmmExpr -> Bool
isOperand :: Platform -> CmmExpr -> Bool
isOperand Platform
_ (CmmLoad CmmExpr
_ CmmType
_ AlignmentSpec
_) = Bool
isOperand Platform
platform (CmmLit CmmLit
                          = Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
                          Bool -> Bool -> Bool
|| CmmLit -> Bool
isSuitableFloatingPointLit CmmLit
isOperand Platform
_ CmmExpr
_            = Bool

-- | Given a 'Register', produce a new 'Register' with an instruction block
-- which will check the value for alignment. Used for @-falignment-sanitisation@.
addAlignmentCheck :: Int -> Register -> Register
addAlignmentCheck :: Int -> Register -> Register
addAlignmentCheck Int
align Register
reg =
    case Register
reg of
      Fixed Format
fmt Reg
reg InstrBlock
code -> Format -> Reg -> InstrBlock -> Register
Fixed Format
fmt Reg
reg (InstrBlock
code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Format -> Reg -> InstrBlock
check Format
fmt Reg
      Any Format
fmt Reg -> InstrBlock
f          -> Format -> (Reg -> InstrBlock) -> Register
Any Format
fmt (\Reg
reg -> Reg -> InstrBlock
f Reg
reg InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Format -> Reg -> InstrBlock
check Format
fmt Reg
    check :: Format -> Reg -> InstrBlock
    check :: Format -> Reg -> InstrBlock
check Format
fmt Reg
reg =
        Bool -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> a -> a
assert (Format -> Bool
isIntFormat Format
fmt) (InstrBlock -> InstrBlock) -> InstrBlock -> InstrBlock
forall a b. (a -> b) -> a -> b
        [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
TEST Format
fmt (Imm -> Operand
OpImm (Imm -> Operand) -> Imm -> Operand
forall a b. (a -> b) -> a -> b
$ Int -> Imm
ImmInt (Int -> Imm) -> Int -> Imm
forall a b. (a -> b) -> a -> b
$ Int
alignInt -> Int -> Int
forall a. Num a => a -> a -> a
1) (Reg -> Operand
OpReg Reg
             , Cond -> Imm -> Instr
NE (Imm -> Instr) -> Imm -> Instr
forall a b. (a -> b) -> a -> b
$ CLabel -> Imm
ImmCLbl CLabel

memConstant :: Alignment -> CmmLit -> NatM Amode
memConstant :: Alignment -> CmmLit -> NatM Amode
memConstant Alignment
align CmmLit
lit = do
  lbl <- NatM CLabel
  let rosection = SectionType -> CLabel -> Section
Section SectionType
ReadOnlyData CLabel
  config <- getConfig
  platform <- getPlatform
  (addr, addr_code) <- if target32Bit platform
                       then do dynRef <- cmmMakeDynamicReference
                               Amode addr addr_code <- getAmode dynRef
                               return (addr, addr_code)
                       else return (ripRel (ImmCLbl lbl), nilOL)
  let code =
        Section -> (Alignment, RawCmmStatics) -> Instr
LDATA Section
rosection (Alignment
align, CLabel -> [CmmStatic] -> RawCmmStatics
forall (rawOnly :: Bool).
CLabel -> [CmmStatic] -> GenCmmStatics rawOnly
CmmStaticsRaw CLabel
lbl [CmmLit -> CmmStatic
CmmStaticLit CmmLit
        Instr -> InstrBlock -> InstrBlock
forall a. a -> OrdList a -> OrdList a
`consOL` InstrBlock
  return (Amode addr code)

-- | Load the value at the given address into any register.
loadAmode :: Format -> AddrMode -> InstrBlock -> NatM Register
loadAmode :: Format -> AddrMode -> InstrBlock -> NatM Register
loadAmode Format
fmt AddrMode
addr InstrBlock
addr_code = do
  config <- NatM NCGConfig
  let load Reg
dst = HasDebugCallStack =>
NCGConfig -> Format -> Operand -> Operand -> Instr
NCGConfig -> Format -> Operand -> Operand -> Instr
movInstr NCGConfig
config Format
fmt (AddrMode -> Operand
OpAddr AddrMode
addr) (Reg -> Operand
OpReg Reg
  return $ Any fmt (\ Reg
dst -> InstrBlock
addr_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Reg -> Instr
load Reg

-- if we want a floating-point literal as an operand, we can
-- use it directly from memory.  However, if the literal is
-- zero, we're better off generating it into a register using
-- xor.
isSuitableFloatingPointLit :: CmmLit -> Bool
isSuitableFloatingPointLit :: CmmLit -> Bool
isSuitableFloatingPointLit (CmmFloat Rational
f Width
_) = Rational
f Rational -> Rational -> Bool
forall a. Eq a => a -> a -> Bool
/= Rational
isSuitableFloatingPointLit CmmLit
_ = Bool

getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem e :: CmmExpr
e@(CmmLoad CmmExpr
mem CmmType
ty AlignmentSpec
_) = do
  is32Bit <- NatM Bool
  if isIntFormat (cmmTypeFormat ty) && (if is32Bit then not (isWord64 ty) else True)
     then do
       Amode src mem_code <- getAmode mem
       return (OpAddr src, mem_code)
     else do
       (reg, code) <- getNonClobberedReg e
       return (OpReg reg, code)
getRegOrMem CmmExpr
e = do
    (reg, code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
    return (OpReg reg, code)

is32BitLit :: Platform -> CmmLit -> Bool
is32BitLit :: Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
   | Platform -> Bool
target32Bit Platform
platform = Bool
is32BitLit Platform
platform CmmLit
lit =
   case CmmLit
lit of
      CmmInt Integer
i Width
W64              -> Integer -> Bool
is32BitInteger Integer
      -- Except on Windows, assume that labels are in the range 0-2^31-1: this
      -- assumes the small memory model. Note [%rip-relative addressing on
      -- x86-64].
      CmmLabel CLabel
_                -> Bool
      -- however we can't assume that label offsets are in this range
      -- (see #15570)
      CmmLabelOff CLabel
_ Int
off         -> Bool
low_image Bool -> Bool -> Bool
&& Integer -> Bool
is32BitInteger (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
      CmmLabelDiffOff CLabel
_ CLabel
_ Int
off Width
_ -> Bool
low_image Bool -> Bool -> Bool
&& Integer -> Bool
is32BitInteger (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
_                         -> Bool
    -- Is the executable image certain to be located below 4GB? As noted in
    -- Note [%rip-relative addressing on x86-64], this is not true on Windows.
    low_image :: Bool
low_image =
      case Platform -> OS
platformOS Platform
platform of
OSMinGW32 -> Bool
False   -- See Note [%rip-relative addressing on x86-64]
_         -> Bool

-- Set up a condition code for a conditional branch.

getCondCode :: CmmExpr -> NatM CondCode

-- yes, they really do seem to want exactly the same!

getCondCode :: CmmExpr -> NatM CondCode
getCondCode (CmmMachOp MachOp
mop [CmmExpr
x, CmmExpr
    case MachOp
mop of
      MO_F_Eq Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
EQQ CmmExpr
x CmmExpr
      MO_F_Ne Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
NE  CmmExpr
x CmmExpr
      MO_F_Gt Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GTT CmmExpr
x CmmExpr
      MO_F_Ge Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GE  CmmExpr
x CmmExpr
      -- Invert comparison condition and swap operands
      -- See Note [SSE Parity Checks]
      MO_F_Lt Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GTT  CmmExpr
y CmmExpr
      MO_F_Le Width
W32 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GE   CmmExpr
y CmmExpr

      MO_F_Eq Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
EQQ CmmExpr
x CmmExpr
      MO_F_Ne Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
NE  CmmExpr
x CmmExpr
      MO_F_Gt Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GTT CmmExpr
x CmmExpr
      MO_F_Ge Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GE  CmmExpr
x CmmExpr
      MO_F_Lt Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GTT CmmExpr
y CmmExpr
      MO_F_Le Width
W64 -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
GE  CmmExpr
y CmmExpr

_ -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode (MachOp -> Cond
machOpToCond MachOp
mop) CmmExpr
x CmmExpr

getCondCode CmmExpr
other = do
   platform <- NatM Platform
   pprPanic "getCondCode(2)(x86,x86_64)" (pdoc platform other)

machOpToCond :: MachOp -> Cond
machOpToCond :: MachOp -> Cond
machOpToCond MachOp
mo = case MachOp
mo of
  MO_Eq Width
_   -> Cond
  MO_Ne Width
_   -> Cond
  MO_S_Gt Width
_ -> Cond
  MO_S_Ge Width
_ -> Cond
  MO_S_Lt Width
_ -> Cond
  MO_S_Le Width
_ -> Cond
  MO_U_Gt Width
_ -> Cond
  MO_U_Ge Width
_ -> Cond
  MO_U_Lt Width
_ -> Cond
  MO_U_Le Width
_ -> Cond
_other -> String -> SDoc -> Cond
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"machOpToCond" (MachOp -> SDoc
pprMachOp MachOp

{-  Note [64-bit integer comparisons on 32-bit]

    When doing these comparisons there are 2 kinds of

    * Comparison for equality (or lack thereof)

    We use xor to check if high/low bits are
    equal. Then combine the results using or.

    * Other comparisons:

    We first compare the low registers
    and use a subtraction with borrow to compare the high registers.

    For signed numbers the condition is determined by
    the sign and overflow flags agreeing or not
    and for unsigned numbers the condition is the carry flag.


-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
-- passed back up the tree.

condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode Cond
cond CmmExpr
x CmmExpr
y = do platform <- NatM Platform
                          condIntCode' platform cond x y

condIntCode' :: Platform -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode

-- 64-bit integer comparisons on 32-bit
-- See Note [64-bit integer comparisons on 32-bit]
condIntCode' :: Platform -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode' Platform
platform Cond
cond CmmExpr
x CmmExpr
  | Platform -> Bool
target32Bit Platform
platform Bool -> Bool -> Bool
&& CmmType -> Bool
isWord64 (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
x) = do

  RegCode64 code1 r1hi r1lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
  RegCode64 code2 r2hi r2lo <- iselExpr64 y

  -- we mustn't clobber r1/r2 so we use temporaries
  tmp1 <- getNewRegNat II32
  tmp2 <- getNewRegNat II32

  let (cond', cmpCode) = intComparison cond r1hi r1lo r2hi r2lo tmp1 tmp2
  return $ CondCode False cond' (code1 `appOL` code2 `appOL` cmpCode)

    intComparison :: Cond
-> Reg -> Reg -> Reg -> Reg -> Reg -> Reg -> (Cond, InstrBlock)
intComparison Cond
cond Reg
r1_hi Reg
r1_lo Reg
r2_hi Reg
r2_lo Reg
tmp1 Reg
tmp2 =
      case Cond
cond of
        -- These don't occur as argument of condIntCode'
ALWAYS  -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
NEG     -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
POS     -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
CARRY   -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
OFLO    -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
PARITY  -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
NOTPARITY -> String -> (Cond, InstrBlock)
forall a. HasCallStack => String -> a
panic String
        -- Special case #1 x == y and x != y
EQQ -> (Cond
EQQ, InstrBlock
NE  -> (Cond
NE, InstrBlock
        -- [x >= y]
GE  -> (Cond
GE, InstrBlock
GEU -> (Cond
GEU, InstrBlock
        -- [x >  y]
GTT -> (Cond
LTT, InstrBlock
GU  -> (Cond
LU, InstrBlock
        -- [x <= y]
LE  -> (Cond
GE, InstrBlock
LEU -> (Cond
GEU, InstrBlock
        -- [x <  y]
LTT -> (Cond
LTT, InstrBlock
LU  -> (Cond
LU, InstrBlock
        cmpExact :: OrdList Instr
        cmpExact :: InstrBlock
cmpExact =
          [Instr] -> InstrBlock
forall a. [a] -> OrdList a
            [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1_hi) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1_lo) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
r2_hi) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
XOR Format
II32 (Reg -> Operand
OpReg Reg
r2_lo) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
OR  Format
II32 (Reg -> Operand
OpReg Reg
tmp1)  (Reg -> Operand
OpReg Reg
        cmpGE :: InstrBlock
cmpGE = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
            [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r1_hi) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
CMP Format
II32 (Reg -> Operand
OpReg Reg
r2_lo) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
SBB Format
II32 (Reg -> Operand
OpReg Reg
r2_hi) (Reg -> Operand
OpReg Reg
        cmpLE :: InstrBlock
cmpLE = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
            [ Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
r2_hi) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
CMP Format
II32 (Reg -> Operand
OpReg Reg
r1_lo) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
SBB Format
II32 (Reg -> Operand
OpReg Reg
r1_hi) (Reg -> Operand
OpReg Reg

-- memory vs immediate
condIntCode' Platform
platform Cond
cond (CmmLoad CmmExpr
x CmmType
ty AlignmentSpec
_) (CmmLit CmmLit
 | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit = do
    Amode x_addr x_code <- CmmExpr -> NatM Amode
getAmode CmmExpr
        imm  = CmmLit -> Imm
litToImm CmmLit
        code = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
CMP (CmmType -> Format
cmmTypeFormat CmmType
ty) (Imm -> Operand
OpImm Imm
imm) (AddrMode -> Operand
OpAddr AddrMode
    return (CondCode False cond code)

-- anything vs zero, using a mask
-- TODO: Add some sanity checking!!!!
condIntCode' Platform
platform Cond
cond (CmmMachOp (MO_And Width
_) [CmmExpr
o2]) (CmmLit (CmmInt Integer
0 Width
    | (CmmLit lit :: CmmLit
lit@(CmmInt Integer
mask Width
_)) <- CmmExpr
o2, Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
    = do
      (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
         code = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                Format -> Operand -> Operand -> Instr
TEST (Width -> Format
intFormat Width
ty) (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
mask)) (Reg -> Operand
OpReg Reg
      return (CondCode False cond code)

-- anything vs zero
condIntCode' Platform
_ Cond
cond CmmExpr
x (CmmLit (CmmInt Integer
0 Width
ty)) = do
    (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
        code = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
TEST (Width -> Format
intFormat Width
ty) (Reg -> Operand
OpReg Reg
x_reg) (Reg -> Operand
OpReg Reg
    return (CondCode False cond code)

-- anything vs operand
condIntCode' Platform
platform Cond
cond CmmExpr
x CmmExpr
 | Platform -> CmmExpr -> Bool
isOperand Platform
platform CmmExpr
y = do
    (x_reg, x_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
    (y_op,  y_code) <- getOperand y
        code = InstrBlock
x_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
y_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
CMP (CmmType -> Format
cmmTypeFormat (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
x)) Operand
y_op (Reg -> Operand
OpReg Reg
    return (CondCode False cond code)
-- operand vs. anything: invert the comparison so that we can use a
-- single comparison instruction.
 | Platform -> CmmExpr -> Bool
isOperand Platform
platform CmmExpr
 , Just Cond
revcond <- Cond -> Maybe Cond
maybeFlipCond Cond
cond = do
    (y_reg, y_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
    (x_op,  x_code) <- getOperand x
        code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
CMP (CmmType -> Format
cmmTypeFormat (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
x)) Operand
x_op (Reg -> Operand
OpReg Reg
    return (CondCode False revcond code)

-- anything vs anything
condIntCode' Platform
platform Cond
cond CmmExpr
x CmmExpr
y = do
  (y_reg, y_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
  (x_op, x_code) <- getRegOrMem x
        code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
CMP (CmmType -> Format
cmmTypeFormat (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
x)) (Reg -> Operand
OpReg Reg
y_reg) Operand
  return (CondCode False cond code)

condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode

condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
cond CmmExpr
x CmmExpr
  =  NatM CondCode

  -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
  -- an operand, but the right must be a reg.  We can probably do better
  -- than this general case...
  condFltCode_sse2 :: NatM CondCode
condFltCode_sse2 = do
    platform <- NatM Platform
    (x_reg, x_code) <- getNonClobberedReg x
    (y_op, y_code) <- getOperand y
        code = InstrBlock
x_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
y_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
CMP (Width -> Format
floatFormat (Width -> Format) -> Width -> Format
forall a b. (a -> b) -> a -> b
$ Platform -> CmmExpr -> Width
cmmExprWidth Platform
platform CmmExpr
x) Operand
y_op (Reg -> Operand
OpReg Reg
        -- NB(1): we need to use the unsigned comparison operators on the
        -- result of this comparison.
    return (CondCode True (condToUnsigned cond) code)

-- -----------------------------------------------------------------------------
-- Generating assignments

-- Assignments are really at the heart of the whole code generation
-- business.  Almost all top-level nodes of any real importance are
-- assignments, which correspond to loads, stores, or register
-- transfers.  If we're really lucky, some of the register transfers
-- will go away, because we can use the destination register to
-- complete the code generation for the right hand side.  This only
-- fails when the right hand side is forced into a fixed register
-- (e.g. the result of a call).

assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_IntCode ::           CmmReg  -> CmmExpr -> NatM InstrBlock

assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_FltCode ::           CmmReg  -> CmmExpr -> NatM InstrBlock

assignMem_VecCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_VecCode ::           CmmReg  -> CmmExpr -> NatM InstrBlock

-- integer assignment to memory

-- specific case of adding/subtracting an integer to a particular address.
-- ToDo: catch other cases where we can use an operation directly on a memory
-- address.
assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_IntCode Format
ty CmmExpr
addr (CmmMachOp MachOp
op [CmmLoad CmmExpr
addr2 CmmType
_ AlignmentSpec
                                                 CmmLit (CmmInt Integer
i Width
   | CmmExpr
addr CmmExpr -> CmmExpr -> Bool
forall a. Eq a => a -> a -> Bool
== CmmExpr
addr2, Format
ty Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
/= Format
II64 Bool -> Bool -> Bool
|| Integer -> Bool
is32BitInteger Integer
     Just Format -> Operand -> Operand -> Instr
instr <- MachOp -> Maybe (Format -> Operand -> Operand -> Instr)
check MachOp
   = do Amode amode code_addr <- CmmExpr -> NatM Amode
getAmode CmmExpr
        let code = InstrBlock
code_addr InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                   Format -> Operand -> Operand -> Instr
instr Format
ty (Imm -> Operand
OpImm (Int -> Imm
ImmInt (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i))) (AddrMode -> Operand
OpAddr AddrMode
        return code
        check :: MachOp -> Maybe (Format -> Operand -> Operand -> Instr)
check (MO_Add Width
_) = (Format -> Operand -> Operand -> Instr)
-> Maybe (Format -> Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just Format -> Operand -> Operand -> Instr
        check (MO_Sub Width
_) = (Format -> Operand -> Operand -> Instr)
-> Maybe (Format -> Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just Format -> Operand -> Operand -> Instr
        check MachOp
_ = Maybe (Format -> Operand -> Operand -> Instr)
forall a. Maybe a
        -- ToDo: more?

-- general case
assignMem_IntCode Format
ty CmmExpr
addr CmmExpr
src = do
    platform <- NatM Platform
    Amode addr code_addr <- getAmode addr
    (code_src, op_src)   <- get_op_RI platform src
        code = InstrBlock
code_src InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
code_addr InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                  Format -> Operand -> Operand -> Instr
MOV Format
ty Operand
op_src (AddrMode -> Operand
OpAddr AddrMode
        -- NOTE: op_src is stable, so it will still be valid
        -- after code_addr.  This may involve the introduction
        -- of an extra MOV to a temporary register, but we hope
        -- the register allocator will get rid of it.
    return code
    get_op_RI :: Platform -> CmmExpr -> NatM (InstrBlock,Operand)   -- code, operator
    get_op_RI :: Platform -> CmmExpr -> NatM (InstrBlock, Operand)
get_op_RI Platform
platform (CmmLit CmmLit
lit) | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
      = (InstrBlock, Operand) -> NatM (InstrBlock, Operand)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock
forall a. OrdList a
nilOL, Imm -> Operand
OpImm (CmmLit -> Imm
litToImm CmmLit
    get_op_RI Platform
_ CmmExpr
      = do (reg,code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
           return (code, OpReg reg)

-- Assign; dst is a reg, rhs is mem
assignReg_IntCode :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_IntCode CmmReg
reg (CmmLoad CmmExpr
src CmmType
_ AlignmentSpec
_) = do
  let ty :: Format
ty = CmmType -> Format
cmmTypeFormat (CmmType -> Format) -> CmmType -> Format
forall a b. (a -> b) -> a -> b
$ CmmReg -> CmmType
cmmRegType CmmReg
  load_code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOV Format
ty) CmmExpr
  platform <- ncgPlatform <$> getConfig
  return (load_code (getRegisterReg platform reg))

-- dst is a reg, but src could be anything
assignReg_IntCode CmmReg
reg CmmExpr
src = do
  platform <- NCGConfig -> Platform
ncgPlatform (NCGConfig -> Platform) -> NatM NCGConfig -> NatM Platform
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM NCGConfig
  code <- getAnyReg src
  return (code (getRegisterReg platform reg))

-- Floating point assignment to memory
assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_FltCode Format
ty CmmExpr
addr CmmExpr
src = do
  (src_reg, src_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
  Amode addr addr_code <- getAmode addr
        code = InstrBlock
src_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
addr_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
               Format -> Operand -> Operand -> Instr
MOV Format
ty (Reg -> Operand
OpReg Reg
src_reg) (AddrMode -> Operand
OpAddr AddrMode

  return code

-- Floating point assignment to a register/temporary
assignReg_FltCode :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_FltCode CmmReg
reg CmmExpr
src = do
  src_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
  platform <- ncgPlatform <$> getConfig
  return (src_code (getRegisterReg platform reg))

-- Vector assignment to a register/temporary
assignMem_VecCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_VecCode Format
ty CmmExpr
addr CmmExpr
src = do
  (src_reg, src_code) <- HasDebugCallStack => CmmExpr -> NatM (Reg, InstrBlock)
CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg CmmExpr
  Amode addr addr_code <- getAmode addr
  config <- getConfig
    code = InstrBlock
src_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
addr_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
           HasDebugCallStack =>
NCGConfig -> Format -> Operand -> Operand -> Instr
NCGConfig -> Format -> Operand -> Operand -> Instr
movInstr NCGConfig
config Format
ty (Reg -> Operand
OpReg Reg
src_reg) (AddrMode -> Operand
OpAddr AddrMode
  return code

assignReg_VecCode :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_VecCode CmmReg
reg CmmExpr
src = do
  platform <- NCGConfig -> Platform
ncgPlatform (NCGConfig -> Platform) -> NatM NCGConfig -> NatM Platform
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM NCGConfig
  src_code <- getAnyReg src
  return (src_code (getRegisterReg platform reg))

genJump :: CmmExpr{-the branch target-} -> [RegWithFormat] -> NatM InstrBlock

genJump :: CmmExpr -> [RegWithFormat] -> NatM InstrBlock
genJump (CmmLoad CmmExpr
mem CmmType
_ AlignmentSpec
_) [RegWithFormat]
regs = do
  Amode target code <- CmmExpr -> NatM Amode
getAmode CmmExpr
  return (code `snocOL` JMP (OpAddr target) regs)

genJump (CmmLit CmmLit
lit) [RegWithFormat]
regs =
  InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Operand -> [RegWithFormat] -> Instr
JMP (Imm -> Operand
OpImm (CmmLit -> Imm
litToImm CmmLit
lit)) [RegWithFormat]

genJump CmmExpr
expr [RegWithFormat]
regs = do
  (reg,code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
  return (code `snocOL` JMP (OpReg reg) regs)

-- -----------------------------------------------------------------------------
--  Unconditional branches

genBranch :: BlockId -> InstrBlock
genBranch :: Label -> InstrBlock
genBranch = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL ([Instr] -> InstrBlock)
-> (Label -> [Instr]) -> Label -> InstrBlock
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Label -> [Instr]

-- -----------------------------------------------------------------------------
--  Conditional jumps/branches

Conditional jumps are always to local labels, so we can use branch
instructions.  We peek at the arguments to decide what kind of
comparison to do.

I386: First, we have to ensure that the condition
codes are set according to the supplied comparison operation.

    :: BlockId      -- the source of the jump
    -> BlockId      -- the true branch target
    -> BlockId      -- the false branch target
    -> CmmExpr      -- the condition on which to branch
    -> NatM InstrBlock -- Instructions

genCondBranch :: Label -> Label -> Label -> CmmExpr -> NatM InstrBlock
genCondBranch Label
bid Label
id Label
false CmmExpr
expr = do
  is32Bit <- NatM Bool
  genCondBranch' is32Bit bid id false expr

-- | We return the instructions generated.
genCondBranch' :: Bool -> BlockId -> BlockId -> BlockId -> CmmExpr
               -> NatM InstrBlock

genCondBranch' :: Bool -> Label -> Label -> Label -> CmmExpr -> NatM InstrBlock
genCondBranch' Bool
_ Label
bid Label
id Label
false CmmExpr
bool = do
  CondCode is_float cond cond_code <- CmmExpr -> NatM CondCode
getCondCode CmmExpr
  if not is_float
        return (cond_code `snocOL` JXX cond id `appOL` genBranch false)
    else do
        -- See Note [SSE Parity Checks]
        let jmpFalse = Label -> InstrBlock
genBranch Label
                = case Cond
cond of
NE  -> InstrBlock
GU  -> InstrBlock
GEU -> InstrBlock
                  -- Use ASSERT so we don't break releases if
                  -- LTT/LE creep in somehow.
LTT ->
                    Bool -> SDoc -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr Bool
False (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"Should have been turned into >")
LE  ->
                    Bool -> SDoc -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr Bool
False (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"Should have been turned into >=")
_   -> InstrBlock

            plain_test = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (
                  Cond -> Label -> Instr
JXX Cond
cond Label
                ) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
            or_unordered = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                  Cond -> Label -> Instr
JXX Cond
cond Label
                  Cond -> Label -> Instr
JXX Cond
                ] InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
            and_ordered = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                  Cond -> Label -> Instr
JXX Cond
                  Cond -> Label -> Instr
JXX Cond
cond Label
                  Cond -> Label -> Instr
JXX Cond
        updateCfgNat (\CFG
cfg -> CFG -> (EdgeWeight -> EdgeWeight) -> Label -> Label -> CFG
adjustEdgeWeight CFG
cfg (EdgeWeight -> EdgeWeight -> EdgeWeight
forall a. Num a => a -> a -> a
3) Label
bid Label
        return (cond_code `appOL` code)

{-  Note [Introducing cfg edges inside basic blocks]

    During instruction selection a statement `s`
    in a block B with control of the sort: B -> C
    will sometimes result in control
    flow of the sort:

            ┌ < ┐
            v   ^
      B ->  B1  ┴ -> C

    as is the case for some atomic operations.

    Now to keep the CFG in sync when introducing B1 we clearly
    want to insert it between B and C. However there is
    a catch when we have to deal with self loops.

    We might start with code and a CFG of these forms:

        stmt1               ┌ < ┐
        ....                v   ^
        stmtX              loop ┘
        goto loop:

    Now we introduce B1:
                            ┌ ─ ─ ─ ─ ─┐
        loop:               │   ┌ <  ┐ │
        instrs              v   │    │ ^
        ....               loop ┴ B1 ┴ ┘
        goto loop:

    This is simple, all outgoing edges from loop now simply
    start from B1 instead and the code generator knows which
    new edges it introduced for the self loop of B1.

    Disaster strikes if the statement Y follows the same pattern.
    If we apply the same rule that all outgoing edges change then
    we end up with:

        loop ─> B1 ─> B2 ┬─┐
          │      │    └─<┤ │
          │      └───<───┘ │

    This is problematic. The edge B1->B1 is modified as expected.
    However the modification is wrong!

    The assembly in this case looked like this:

        cmpxchgq ...
        jne _B1
        <end _B1>
        cmpxchgq ...
        jne _B2
        jmp loop

    There is no edge _B2 -> _B1 here. It's still a self loop onto _B1.

    The problem here is that really B1 should be two basic blocks.
    Otherwise we have control flow in the *middle* of a basic block.
    A contradiction!

    So to account for this we add yet another basic block marker:

        cmpxchgq ...
        jne _B1
        jmp _B1'
        <end _B1>

    Now when inserting B2 we will only look at the outgoing edges of B1' and
    everything will work out nicely.

    You might also wonder why we don't insert jumps at the end of _B1'. There is
    no way another block ends up jumping to the labels _B1 or _B2 since they are
    essentially invisible to other blocks. View them as control flow labels local
    to the basic block if you'd like.

    Not doing this ultimately caused (part 2 of) #17334.

-- -----------------------------------------------------------------------------
--  Generating C calls

-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
-- @get_arg@, which moves the arguments to the correct registers/stack
-- locations.  Apart from that, the code is easy.
-- (If applicable) Do not fill the delay slots here; you will confuse the
-- register allocator.
-- See Note [Keeping track of the current block] for information why we need
-- to take/return a block id.

    :: ForeignTarget -- ^ function to call
    -> [CmmFormal]   -- ^ where to put the result
    -> [CmmActual]   -- ^ arguments (of mixed type)
    -> BlockId       -- ^ The block we are in
    -> NatM (InstrBlock, Maybe BlockId)

genForeignCall :: ForeignTarget
-> [LocalReg]
-> [CmmExpr]
-> Label
-> NatM (InstrBlock, Maybe Label)
genForeignCall ForeignTarget
target [LocalReg]
dst [CmmExpr]
args Label
bid = do
  case ForeignTarget
target of
    PrimTarget CallishMachOp
prim         -> Label
-> CallishMachOp
-> [LocalReg]
-> [CmmExpr]
-> NatM (InstrBlock, Maybe Label)
genPrim Label
bid CallishMachOp
prim [LocalReg]
dst [CmmExpr]
    ForeignTarget CmmExpr
addr ForeignConvention
conv -> (,Maybe Label
forall a. Maybe a
Nothing) (InstrBlock -> (InstrBlock, Maybe Label))
-> NatM InstrBlock -> NatM (InstrBlock, Maybe Label)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Label
-> CmmExpr
-> ForeignConvention
-> [LocalReg]
-> [CmmExpr]
-> NatM InstrBlock
genCCall Label
bid CmmExpr
addr ForeignConvention
conv [LocalReg]
dst [CmmExpr]

    :: BlockId       -- ^ The block we are in
    -> CallishMachOp -- ^ MachOp
    -> [CmmFormal]   -- ^ where to put the result
    -> [CmmActual]   -- ^ arguments (of mixed type)
    -> NatM (InstrBlock, Maybe BlockId)

-- First we deal with cases which might introduce new blocks in the stream.
genPrim :: Label
-> CallishMachOp
-> [LocalReg]
-> [CmmExpr]
-> NatM (InstrBlock, Maybe Label)
genPrim Label
bid (MO_AtomicRMW Width
width AtomicMachOp
amop) [LocalReg
dst] [CmmExpr
addr, CmmExpr
  = Label
-> Width
-> AtomicMachOp
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM (InstrBlock, Maybe Label)
genAtomicRMW Label
bid Width
width AtomicMachOp
amop LocalReg
dst CmmExpr
addr CmmExpr
genPrim Label
bid (MO_Ctz Width
width) [LocalReg
dst] [CmmExpr
  = Label
-> Width -> LocalReg -> CmmExpr -> NatM (InstrBlock, Maybe Label)
genCtz Label
bid Width
width LocalReg
dst CmmExpr

-- Then we deal with cases which not introducing new blocks in the stream.
genPrim Label
bid CallishMachOp
prim [LocalReg]
dst [CmmExpr]
  = (,Maybe Label
forall a. Maybe a
Nothing) (InstrBlock -> (InstrBlock, Maybe Label))
-> NatM InstrBlock -> NatM (InstrBlock, Maybe Label)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Label
-> CallishMachOp -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genSimplePrim Label
bid CallishMachOp
prim [LocalReg]
dst [CmmExpr]

    :: BlockId       -- ^ the block we are in
    -> CallishMachOp -- ^ MachOp
    -> [CmmFormal]   -- ^ where to put the result
    -> [CmmActual]   -- ^ arguments (of mixed type)
    -> NatM InstrBlock
genSimplePrim :: Label
-> CallishMachOp -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genSimplePrim Label
bid (MO_Memcpy Int
align)    []      [CmmExpr
n]    = Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemCpy  Label
bid Int
align CmmExpr
dst CmmExpr
src CmmExpr
genSimplePrim Label
bid (MO_Memmove Int
align)   []      [CmmExpr
n]    = Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
forall p.
Label -> p -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemMove Label
bid Int
align CmmExpr
dst CmmExpr
src CmmExpr
genSimplePrim Label
bid (MO_Memcmp Int
align)    [LocalReg
res]   [CmmExpr
n]    = Label
-> Int
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
forall p.
-> p
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genMemCmp  Label
bid Int
align LocalReg
res CmmExpr
dst CmmExpr
src CmmExpr
genSimplePrim Label
bid (MO_Memset Int
align)    []      [CmmExpr
n]      = Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemSet  Label
bid Int
align CmmExpr
dst CmmExpr
c CmmExpr
genSimplePrim Label
_   CallishMachOp
MO_AcquireFence      []      []             = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL -- barriers compile to no code on x86/x86-64;
genSimplePrim Label
_   CallishMachOp
MO_ReleaseFence      []      []             = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
nilOL -- we keep it this long in order to prevent earlier optimisations.
genSimplePrim Label
_   CallishMachOp
MO_SeqCstFence       []      []             = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> NatM InstrBlock) -> InstrBlock -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ Instr -> InstrBlock
forall a. a -> OrdList a
unitOL Instr
genSimplePrim Label
_   CallishMachOp
MO_Touch             []      [CmmExpr
_]            = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
genSimplePrim Label
_   (MO_Prefetch_Data Int
n) []      [CmmExpr
src]          = Int -> CmmExpr -> NatM InstrBlock
genPrefetchData Int
n CmmExpr
genSimplePrim Label
_   (MO_BSwap Width
width)     [LocalReg
dst]   [CmmExpr
src]          = Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genByteSwap Width
width LocalReg
dst CmmExpr
genSimplePrim Label
bid (MO_BRev Width
width)      [LocalReg
dst]   [CmmExpr
src]          = Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genBitRev Label
bid Width
width LocalReg
dst CmmExpr
genSimplePrim Label
bid (MO_PopCnt Width
width)    [LocalReg
dst]   [CmmExpr
src]          = Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genPopCnt Label
bid Width
width LocalReg
dst CmmExpr
genSimplePrim Label
bid (MO_Pdep Width
width)      [LocalReg
dst]   [CmmExpr
mask]     = Label -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPdep Label
bid Width
width LocalReg
dst CmmExpr
src CmmExpr
genSimplePrim Label
bid (MO_Pext Width
width)      [LocalReg
dst]   [CmmExpr
mask]     = Label -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPext Label
bid Width
width LocalReg
dst CmmExpr
src CmmExpr
genSimplePrim Label
bid (MO_Clz Width
width)       [LocalReg
dst]   [CmmExpr
src]          = Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genClz Label
bid Width
width LocalReg
dst CmmExpr
genSimplePrim Label
bid (MO_UF_Conv Width
width)   [LocalReg
dst]   [CmmExpr
src]          = Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genWordToFloat Label
bid Width
width LocalReg
dst CmmExpr
genSimplePrim Label
_   (MO_AtomicRead Width
w MemoryOrdering
mo)  [LocalReg
dst]  [CmmExpr
addr]         = Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
genAtomicRead Width
w MemoryOrdering
mo LocalReg
dst CmmExpr
genSimplePrim Label
_   (MO_AtomicWrite Width
w MemoryOrdering
mo) []     [CmmExpr
val]     = Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAtomicWrite Width
w MemoryOrdering
mo CmmExpr
addr CmmExpr
genSimplePrim Label
bid (MO_Cmpxchg Width
width)   [LocalReg
dst]   [CmmExpr
new] = Label
-> Width
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genCmpXchg Label
bid Width
width LocalReg
dst CmmExpr
addr CmmExpr
old CmmExpr
genSimplePrim Label
_   (MO_Xchg Width
width)      [LocalReg
dst]   [CmmExpr
addr, CmmExpr
value]  = Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genXchg Width
width LocalReg
dst CmmExpr
addr CmmExpr
genSimplePrim Label
_   (MO_AddWordC Width
w)      [LocalReg
c]   [CmmExpr
y]          = Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
w Format -> Operand -> Operand -> Instr
ADD_CC (Maybe (Operand -> Operand -> Instr)
-> Format -> Maybe (Operand -> Operand -> Instr)
forall a b. a -> b -> a
const Maybe (Operand -> Operand -> Instr)
forall a. Maybe a
Nothing) Cond
CARRY LocalReg
r LocalReg
c CmmExpr
x CmmExpr
genSimplePrim Label
_   (MO_SubWordC Width
w)      [LocalReg
c]   [CmmExpr
y]          = Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
w Format -> Operand -> Operand -> Instr
SUB_CC (Maybe (Operand -> Operand -> Instr)
-> Format -> Maybe (Operand -> Operand -> Instr)
forall a b. a -> b -> a
const Maybe (Operand -> Operand -> Instr)
forall a. Maybe a
Nothing) Cond
CARRY LocalReg
r LocalReg
c CmmExpr
x CmmExpr
genSimplePrim Label
_   (MO_AddIntC Width
w)       [LocalReg
c]   [CmmExpr
y]          = Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
w Format -> Operand -> Operand -> Instr
ADD_CC ((Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
forall a. a -> Maybe a
Just ((Operand -> Operand -> Instr)
 -> Maybe (Operand -> Operand -> Instr))
-> (Format -> Operand -> Operand -> Instr)
-> Format
-> Maybe (Operand -> Operand -> Instr)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Format -> Operand -> Operand -> Instr
ADD_CC) Cond
OFLO  LocalReg
r LocalReg
c CmmExpr
x CmmExpr
genSimplePrim Label
_   (MO_SubIntC Width
w)       [LocalReg
c]   [CmmExpr
y]          = Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
w Format -> Operand -> Operand -> Instr
SUB_CC (Maybe (Operand -> Operand -> Instr)
-> Format -> Maybe (Operand -> Operand -> Instr)
forall a b. a -> b -> a
const Maybe (Operand -> Operand -> Instr)
forall a. Maybe a
Nothing) Cond
OFLO  LocalReg
r LocalReg
c CmmExpr
x CmmExpr
genSimplePrim Label
_   (MO_Add2 Width
w)          [LocalReg
l]   [CmmExpr
y]          = Width
-> LocalReg -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAddWithCarry Width
w LocalReg
h LocalReg
l CmmExpr
x CmmExpr
genSimplePrim Label
_   (MO_U_Mul2 Width
w)        [LocalReg
l]   [CmmExpr
y]          = Width
-> LocalReg -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genUnsignedLargeMul Width
w LocalReg
h LocalReg
l CmmExpr
x CmmExpr
genSimplePrim Label
_   (MO_S_Mul2 Width
w)        [LocalReg
l] [CmmExpr
y]          = Width
-> LocalReg
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genSignedLargeMul Width
w LocalReg
c LocalReg
h LocalReg
l CmmExpr
x CmmExpr
genSimplePrim Label
_   (MO_S_QuotRem Width
w)     [LocalReg
r]   [CmmExpr
y]          = Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
w Bool
True  LocalReg
q LocalReg
r Maybe CmmExpr
forall a. Maybe a
Nothing   CmmExpr
x  CmmExpr
genSimplePrim Label
_   (MO_U_QuotRem Width
w)     [LocalReg
r]   [CmmExpr
y]          = Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
w Bool
False LocalReg
q LocalReg
r Maybe CmmExpr
forall a. Maybe a
Nothing   CmmExpr
x  CmmExpr
genSimplePrim Label
_   (MO_U_QuotRem2 Width
w)    [LocalReg
r]   [CmmExpr
y]      = Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
w Bool
False LocalReg
q LocalReg
r (CmmExpr -> Maybe CmmExpr
forall a. a -> Maybe a
Just CmmExpr
hx) CmmExpr
lx CmmExpr
genSimplePrim Label
_   CallishMachOp
MO_F32_Fabs          [LocalReg
dst]   [CmmExpr
src]          = Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatAbs Width
W32 LocalReg
dst CmmExpr
genSimplePrim Label
_   CallishMachOp
MO_F64_Fabs          [LocalReg
dst]   [CmmExpr
src]          = Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatAbs Width
W64 LocalReg
dst CmmExpr
genSimplePrim Label
_   CallishMachOp
MO_F32_Sqrt          [LocalReg
dst]   [CmmExpr
src]          = Format -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatSqrt Format
FF32 LocalReg
dst CmmExpr
genSimplePrim Label
_   CallishMachOp
MO_F64_Sqrt          [LocalReg
dst]   [CmmExpr
src]          = Format -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatSqrt Format
FF64 LocalReg
dst CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Sin           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"sinf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Cos           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"cosf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Tan           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"tanf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Exp           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"expf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_ExpM1         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"expm1f") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Log           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"logf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Log1P         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"log1pf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Asin          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"asinf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Acos          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"acosf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Atan          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"atanf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Sinh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"sinhf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Cosh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"coshf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Tanh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"tanhf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Pwr           [LocalReg
dst]   [CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"powf")  [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Asinh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"asinhf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Acosh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"acoshf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F32_Atanh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"atanhf") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Sin           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"sin") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Cos           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"cos") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Tan           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"tan") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Exp           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"exp") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_ExpM1         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"expm1") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Log           [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"log") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Log1P         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"log1p") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Asin          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"asin") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Acos          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"acos") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Atan          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"atan") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Sinh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"sinh") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Cosh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"cosh") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Tanh          [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"tanh") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Pwr           [LocalReg
dst]   [CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"pow")  [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Asinh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"asinh") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Acosh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"acosh") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_F64_Atanh         [LocalReg
dst]   [CmmExpr
src]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"atanh") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_SuspendThread     [LocalReg
tok]   [CmmExpr
i]         = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genRTSCCall Label
bid (String -> FastString
fsLit String
"suspendThread") [LocalReg
tok] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_ResumeThread      [LocalReg
rs]    [CmmExpr
tok]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genRTSCCall Label
bid (String -> FastString
fsLit String
"resumeThread") [LocalReg
rs] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_I64_Quot          [LocalReg
dst]   [CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (String -> FastString
fsLit String
"hs_quotInt64") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_I64_Rem           [LocalReg
dst]   [CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (String -> FastString
fsLit String
"hs_remInt64") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_W64_Quot          [LocalReg
dst]   [CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (String -> FastString
fsLit String
"hs_quotWord64") [LocalReg
dst] [CmmExpr
genSimplePrim Label
bid CallishMachOp
MO_W64_Rem           [LocalReg
dst]   [CmmExpr
y]          = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (String -> FastString
fsLit String
"hs_remWord64") [LocalReg
dst] [CmmExpr
genSimplePrim Label
_   CallishMachOp
op                   [LocalReg]
dst     [CmmExpr]
args           = do
  platform <- NCGConfig -> Platform
ncgPlatform (NCGConfig -> Platform) -> NatM NCGConfig -> NatM Platform
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM NCGConfig
  pprPanic "genSimplePrim: unhandled primop" (ppr (pprCallishMachOp op, dst, fmap (pdoc platform) args))

{- Note [Evaluate C-call arguments before placing in destination registers]
When producing code for C calls we must take care when placing arguments
in their final registers. Specifically, we must ensure that temporary register
usage due to evaluation of one argument does not clobber a register in which we
already placed a previous argument (e.g. as the code generation logic for
MO_Shl can clobber %rcx due to x86 instruction limitations).

This is precisely what happened in #18527. Consider this C--:

    (result::I64) = call "ccall" doSomething(_s2hp::I64, 2244, _s2hq::I64, _s2hw::I64 | (1 << _s2hz::I64));

Here we are calling the C function `doSomething` with three arguments, the last
involving a non-trivial expression involving MO_Shl. In this case the NCG could
naively generate the following assembly (where $tmp denotes some temporary
register and $argN denotes the register for argument N, as dictated by the
platform's calling convention):

    mov _s2hp, $arg1   # place first argument
    mov _s2hq, $arg2   # place second argument

    # Compute 1 << _s2hz
    mov _s2hz, %rcx
    shl %cl, $tmp

    # Compute (_s2hw | (1 << _s2hz))
    mov _s2hw, $arg3
    or $tmp, $arg3

    # Perform the call
    call func

This code is outright broken on Windows which assigns $arg1 to %rcx. This means
that the evaluation of the last argument clobbers the first argument.

To avoid this we use a rather awful hack: when producing code for a C call with
at least one non-trivial argument, we first evaluate all of the arguments into
local registers before moving them into their final calling-convention-defined
homes.  This is performed by 'evalArgs'. Here we define "non-trivial" to be an
expression which might contain a MachOp since these are the only cases which
might clobber registers. Furthermore, we use a conservative approximation of
this condition (only looking at the top-level of CmmExprs) to avoid spending
too much effort trying to decide whether we want to take the fast path.

Note that this hack *also* applies to calls to out-of-line PrimTargets (which
are lowered via a C call), which will ultimately end up in

-- | See Note [Evaluate C-call arguments before placing in destination registers]
evalArgs :: BlockId -> [CmmActual] -> NatM (InstrBlock, [CmmActual])
evalArgs :: Label -> [CmmExpr] -> NatM (InstrBlock, [CmmExpr])
evalArgs Label
bid [CmmExpr]
  | (CmmExpr -> Bool) -> [CmmExpr] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any CmmExpr -> Bool
loadIntoRegMightClobberOtherReg [CmmExpr]
actuals = do
      regs_blks <- (CmmExpr -> NatM (InstrBlock, CmmExpr))
-> [CmmExpr] -> NatM [(InstrBlock, CmmExpr)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM CmmExpr -> NatM (InstrBlock, CmmExpr)
evalArg [CmmExpr]
      return (concatOL $ map fst regs_blks, map snd regs_blks)
  | Bool
otherwise = (InstrBlock, [CmmExpr]) -> NatM (InstrBlock, [CmmExpr])
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock
forall a. OrdList a
nilOL, [CmmExpr]

    evalArg :: CmmActual -> NatM (InstrBlock, CmmExpr)
    evalArg :: CmmExpr -> NatM (InstrBlock, CmmExpr)
evalArg CmmExpr
actual = do
        platform <- NatM Platform
        lreg <- newLocalReg $ cmmExprType platform actual
        (instrs, bid1) <- stmtToInstrs bid $ CmmAssign (CmmLocal lreg) actual
        -- The above assignment shouldn't change the current block
        massert (isNothing bid1)
        return (instrs, CmmReg $ CmmLocal lreg)

    newLocalReg :: CmmType -> NatM LocalReg
    newLocalReg :: CmmType -> NatM LocalReg
newLocalReg CmmType
ty = Unique -> CmmType -> LocalReg
LocalReg (Unique -> CmmType -> LocalReg)
-> NatM Unique -> NatM (CmmType -> LocalReg)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM Unique
forall (m :: * -> *). MonadGetUnique m => m Unique
getUniqueM NatM (CmmType -> LocalReg) -> NatM CmmType -> NatM LocalReg
forall a b. NatM (a -> b) -> NatM a -> NatM b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> CmmType -> NatM CmmType
forall a. a -> NatM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure CmmType

-- | Might the code to put this expression into a register
-- clobber any other registers?
loadIntoRegMightClobberOtherReg :: CmmExpr -> Bool
loadIntoRegMightClobberOtherReg :: CmmExpr -> Bool
loadIntoRegMightClobberOtherReg (CmmReg CmmReg
_)      = Bool
loadIntoRegMightClobberOtherReg (CmmRegOff CmmReg
_ Int
_) = Bool
loadIntoRegMightClobberOtherReg (CmmLit CmmLit
_)      = Bool
  -- NB: this last 'False' is slightly risky, because the code for loading
  -- a literal into a register is not entirely trivial.
loadIntoRegMightClobberOtherReg CmmExpr
_               = Bool

-- Note [DIV/IDIV for bytes]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~
-- IDIV reminder:
--   Size    Dividend   Divisor   Quotient    Remainder
--   byte    %ax         r/m8      %al          %ah
--   word    %dx:%ax     r/m16     %ax          %dx
--   dword   %edx:%eax   r/m32     %eax         %edx
--   qword   %rdx:%rax   r/m64     %rax         %rdx
-- We do a special case for the byte division because the current
-- codegen doesn't deal well with accessing %ah register (also,
-- accessing %ah in 64-bit mode is complicated because it cannot be an
-- operand of many instructions). So we just widen operands to 16 bits
-- and get the results from %al, %dl. This is not optimal, but a few
-- register moves are probably not a huge deal when doing division.

-- | Generate C call to the given function in ghc-prim
  :: BlockId
  -> FastString
  -> [CmmFormal]
  -> [CmmActual]
  -> NatM InstrBlock
genPrimCCall :: Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid FastString
lbl_txt [LocalReg]
dsts [CmmExpr]
args = do
  config <- NatM NCGConfig
  -- FIXME: we should use mkForeignLabel instead of mkCmmCodeLabel
  let lbl = UnitId -> FastString -> CLabel
mkCmmCodeLabel UnitId
primUnitId FastString
  addr <- cmmMakeDynamicReference config CallReference lbl
  let conv = CCallConv
-> [ForeignHint]
-> [ForeignHint]
-> CmmReturnInfo
-> ForeignConvention
ForeignConvention CCallConv
CCallConv [] [] CmmReturnInfo
  genCCall bid addr conv dsts args

-- | Generate C call to the given function in libc
  :: BlockId
  -> FastString
  -> [CmmFormal]
  -> [CmmActual]
  -> NatM InstrBlock
genLibCCall :: Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid FastString
lbl_txt [LocalReg]
dsts [CmmExpr]
args = do
  config <- NatM NCGConfig
  -- Assume we can call these functions directly, and that they're not in a dynamic library.
  -- TODO: Why is this ok? Under linux this code will be in libm.so
  --       Is it because they're really implemented as a primitive instruction by the assembler??  -- BL 2009/12/31
  let lbl = FastString -> ForeignLabelSource -> FunctionOrData -> CLabel
mkForeignLabel FastString
lbl_txt ForeignLabelSource
ForeignLabelInThisPackage FunctionOrData
  addr <- cmmMakeDynamicReference config CallReference lbl
  let conv = CCallConv
-> [ForeignHint]
-> [ForeignHint]
-> CmmReturnInfo
-> ForeignConvention
ForeignConvention CCallConv
CCallConv [] [] CmmReturnInfo
  genCCall bid addr conv dsts args

-- | Generate C call to the given function in the RTS
  :: BlockId
  -> FastString
  -> [CmmFormal]
  -> [CmmActual]
  -> NatM InstrBlock
genRTSCCall :: Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genRTSCCall Label
bid FastString
lbl_txt [LocalReg]
dsts [CmmExpr]
args = do
  config <- NatM NCGConfig
  -- Assume we can call these functions directly, and that they're not in a dynamic library.
  let lbl = FastString -> ForeignLabelSource -> FunctionOrData -> CLabel
mkForeignLabel FastString
lbl_txt ForeignLabelSource
ForeignLabelInThisPackage FunctionOrData
  addr <- cmmMakeDynamicReference config CallReference lbl
  let conv = CCallConv
-> [ForeignHint]
-> [ForeignHint]
-> CmmReturnInfo
-> ForeignConvention
ForeignConvention CCallConv
CCallConv [] [] CmmReturnInfo
  genCCall bid addr conv dsts args

-- | Generate a real C call to the given address with the given convention
  :: BlockId
  -> CmmExpr
  -> ForeignConvention
  -> [CmmFormal]
  -> [CmmActual]
  -> NatM InstrBlock
genCCall :: Label
-> CmmExpr
-> ForeignConvention
-> [LocalReg]
-> [CmmExpr]
-> NatM InstrBlock
genCCall Label
bid CmmExpr
addr conv :: ForeignConvention
conv@(ForeignConvention CCallConv
_ [ForeignHint]
argHints [ForeignHint]
_ CmmReturnInfo
_) [LocalReg]
dest_regs [CmmExpr]
args = do
  platform <- NatM Platform
  is32Bit <- is32BitPlatform
  let args_hints = [CmmExpr] -> [ForeignHint] -> [(CmmExpr, ForeignHint)]
forall a b. [a] -> [b] -> [(a, b)]
zip [CmmExpr]
args ([ForeignHint]
argHints [ForeignHint] -> [ForeignHint] -> [ForeignHint]
forall a. [a] -> [a] -> [a]
++ ForeignHint -> [ForeignHint]
forall a. a -> [a]
repeat ForeignHint
      prom_args = ((CmmExpr, ForeignHint) -> CmmExpr)
-> [(CmmExpr, ForeignHint)] -> [CmmExpr]
forall a b. (a -> b) -> [a] -> [b]
map (Platform -> (CmmExpr, ForeignHint) -> CmmExpr
maybePromoteCArgToW32 Platform
platform) [(CmmExpr, ForeignHint)]
  (instrs0, args') <- evalArgs bid prom_args
  instrs1 <- if is32Bit
    then genCCall32 addr conv dest_regs args'
    else genCCall64 addr conv dest_regs args'
  return (instrs0 `appOL` instrs1)

maybePromoteCArgToW32 :: Platform -> (CmmExpr, ForeignHint) -> CmmExpr
maybePromoteCArgToW32 :: Platform -> (CmmExpr, ForeignHint) -> CmmExpr
maybePromoteCArgToW32 Platform
platform (CmmExpr
arg, ForeignHint
 | Width
wfrom Width -> Width -> Bool
forall a. Ord a => a -> a -> Bool
< Width
wto =
    -- As wto=W32, we only need to handle integer conversions,
    -- never Float -> Double.
    case ForeignHint
hint of
SignedHint -> MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> Width -> MachOp
MO_SS_Conv Width
wfrom Width
wto) [CmmExpr
_          -> MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp (Width -> Width -> MachOp
MO_UU_Conv Width
wfrom Width
wto) [CmmExpr
 | Bool
otherwise   = CmmExpr
   ty :: CmmType
ty = Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
   wfrom :: Width
wfrom = CmmType -> Width
typeWidth CmmType
   wto :: Width
wto = Width

genCCall32 :: CmmExpr           -- ^ address of the function to call
           -> ForeignConvention -- ^ calling convention
           -> [CmmFormal]       -- ^ where to put the result
           -> [CmmActual]       -- ^ arguments (of mixed type)
           -> NatM InstrBlock
genCCall32 :: CmmExpr
-> ForeignConvention -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genCCall32 CmmExpr
addr ForeignConvention
_conv [LocalReg]
dest_regs [CmmExpr]
args = do
        config <- NatM NCGConfig
        let platform = NCGConfig -> Platform
ncgPlatform NCGConfig

            -- If the size is smaller than the word, we widen things (see maybePromoteCArg)
            arg_size_bytes :: CmmType -> Int
            arg_size_bytes CmmType
ty = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Width -> Int
widthInBytes (CmmType -> Width
typeWidth CmmType
ty)) (Width -> Int
widthInBytes (Platform -> Width
wordWidth Platform

            roundTo a
a a
x | a
x a -> a -> a
forall a. Integral a => a -> a -> a
`mod` a
a a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0 = a
                        | Bool
otherwise = a
x a -> a -> a
forall a. Num a => a -> a -> a
+ a
a a -> a -> a
forall a. Num a => a -> a -> a
- (a
x a -> a -> a
forall a. Integral a => a -> a -> a
`mod` a

            push_arg :: CmmActual {-current argument-}
                            -> NatM InstrBlock  -- code

            push_arg  CmmExpr
arg -- we don't need the hints on x86
              | CmmType -> Bool
isWord64 CmmType
arg_ty = do
                RegCode64 code r_hi r_lo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
                delta <- getDeltaNat
                setDeltaNat (delta - 8)
                return (       code `appOL`
                               toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
                                     PUSH II32 (OpReg r_lo), DELTA (delta - 8),
                                     DELTA (delta-8)]

              | CmmType -> Bool
isFloatType CmmType
arg_ty Bool -> Bool -> Bool
|| CmmType -> Bool
isVecType CmmType
arg_ty = do
                (reg, code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
                delta <- getDeltaNat
                setDeltaNat (delta-size)
                return (code `appOL`
                                toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
                                      DELTA (delta-size),
                                      let addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
                                                                (Int -> Imm
ImmInt Int
                                          format = CmmType -> Format
cmmTypeFormat CmmType

                                       movInstr config format (OpReg reg) (OpAddr addr)


              | Bool
otherwise = do
                -- Arguments can be smaller than 32-bit, but we still use @PUSH
                -- II32@ - the usual calling conventions expect integers to be
                -- 4-byte aligned.
                Bool -> NatM ()
forall (m :: * -> *). (HasCallStack, Applicative m) => Bool -> m ()
massert ((CmmType -> Width
typeWidth CmmType
arg_ty) Width -> Width -> Bool
forall a. Ord a => a -> a -> Bool
<= Width
                (operand, code) <- CmmExpr -> NatM (Operand, InstrBlock)
getOperand CmmExpr
                delta <- getDeltaNat
                setDeltaNat (delta-size)
                return (code `snocOL`
                        PUSH II32 operand `snocOL`
                        DELTA (delta-size))

                 arg_ty :: CmmType
arg_ty = Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
                 size :: Int
size = CmmType -> Int
arg_size_bytes CmmType
arg_ty -- Byte size

            -- Align stack to 16n for calls, assuming a starting stack
            -- alignment of 16n - word_size on procedure entry. Which we
            -- maintain. See Note [Stack Alignment on X86] in rts/StgCRun.c.
            sizes               = (CmmExpr -> Int) -> [CmmExpr] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (CmmType -> Int
arg_size_bytes (CmmType -> Int) -> (CmmExpr -> CmmType) -> CmmExpr -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform) ([CmmExpr] -> [CmmExpr]
forall a. [a] -> [a]
reverse [CmmExpr]
            raw_arg_size        = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
sizes Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Platform -> Int
platformWordSizeInBytes Platform
            arg_pad_size        = (Int -> Int -> Int
forall a. Integral a => a -> a -> a
roundTo Int
16 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int
raw_arg_size) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
            tot_arg_size        = Int
raw_arg_size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
arg_pad_size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Platform -> Int
platformWordSizeInBytes Platform

        delta0 <- getDeltaNat
        setDeltaNat (delta0 - arg_pad_size)

        push_codes <- mapM push_arg (reverse args)
        delta <- getDeltaNat
        massert (delta == delta0 - tot_arg_size)

        -- deal with static vs dynamic call targets
        callinsns <-
          case addr of
            CmmLit (CmmLabel CLabel
               -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (InstrBlock -> NatM InstrBlock) -> InstrBlock -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Either Imm Reg -> [RegWithFormat] -> Instr
CALL (Imm -> Either Imm Reg
forall a b. a -> Either a b
Left Imm
fn_imm) [])
               where fn_imm :: Imm
fn_imm = CLabel -> Imm
ImmCLbl CLabel
               -> do { (dyn_r, dyn_c) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
                     ; massert (isWord32 (cmmExprType platform addr))
                     ; return $ dyn_c `snocOL` CALL (Right dyn_r) [] }
        let push_code
                | Int
arg_pad_size Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
                = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [Format -> Operand -> Operand -> Instr
SUB Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
arg_pad_size)) (Reg -> Operand
OpReg Reg
                        Int -> Instr
delta0 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
                  InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [InstrBlock] -> InstrBlock
forall a. [OrdList a] -> OrdList a
concatOL [InstrBlock]
                | Bool
                = [InstrBlock] -> InstrBlock
forall a. [OrdList a] -> OrdList a
concatOL [InstrBlock]

            call = InstrBlock
callinsns InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                   [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL (
                      (if Int
tot_arg_size Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then [] else
                       [Format -> Operand -> Operand -> Instr
ADD Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
tot_arg_size)) (Reg -> Operand
OpReg Reg
                      [Instr] -> [Instr] -> [Instr]
forall a. [a] -> [a] -> [a]
                      [Int -> Instr
        setDeltaNat delta0

            -- assign the results, if necessary
            assign_code []     = InstrBlock
forall a. OrdList a
            assign_code [LocalReg
              | CmmType -> Bool
isVecType CmmType
              = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr
NCGConfig -> Format -> Reg -> Reg -> Instr
mkRegRegMoveInstr NCGConfig
config (CmmType -> Format
cmmTypeFormat CmmType
ty) Reg
xmm0 Reg
              | CmmType -> Bool
isFloatType CmmType
ty =
                  -- we assume SSE2
                  let tmp_amode :: AddrMode
tmp_amode = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
                                                       (Int -> Imm
ImmInt Int
                      fmt :: Format
fmt = Width -> Format
floatFormat Width
                         in [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
SUB Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
b)) (Reg -> Operand
OpReg Reg
                                   Int -> Instr
delta0 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
                                   Format -> AddrMode -> Instr
X87Store Format
fmt  AddrMode
                                   -- X87Store only supported for the CDECL ABI
                                   -- NB: This code will need to be
                                   -- revisited once GHC does more work around
                                   -- SIGFPE f
                                   Format -> Operand -> Operand -> Instr
MOV Format
fmt (AddrMode -> Operand
OpAddr AddrMode
tmp_amode) (Reg -> Operand
OpReg Reg
                                   Format -> Operand -> Operand -> Instr
ADD Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
b)) (Reg -> Operand
OpReg Reg
                                   Int -> Instr
              | CmmType -> Bool
isWord64 CmmType
ty    = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
eax) (Reg -> Operand
OpReg Reg
                                        Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
edx) (Reg -> Operand
OpReg Reg
              | Bool
otherwise      = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV (Width -> Format
intFormat Width
                                             (Reg -> Operand
OpReg Reg
                                             (Reg -> Operand
OpReg Reg
                    ty :: CmmType
ty = LocalReg -> CmmType
localRegType LocalReg
                    w :: Width
w  = CmmType -> Width
typeWidth CmmType
                    b :: Int
b  = Width -> Int
widthInBytes Width
                    r_dest_hi :: Reg
r_dest_hi = Reg -> Reg
getHiVRegFromLo Reg
                    r_dest :: Reg
r_dest    = LocalReg -> Reg
getLocalRegReg LocalReg
            assign_code [LocalReg]
many = String -> SDoc -> InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"genForeignCall.assign_code - too many return values:" ([LocalReg] -> SDoc
forall a. Outputable a => a -> SDoc
ppr [LocalReg]

        return (push_code `appOL`
                call `appOL`
                assign_code dest_regs)

genCCall64 :: CmmExpr           -- ^ address of function to call
           -> ForeignConvention -- ^ calling convention
           -> [CmmFormal]       -- ^ where to put the result
           -> [CmmActual]       -- ^ arguments (of mixed type)
           -> NatM InstrBlock
genCCall64 :: CmmExpr
-> ForeignConvention -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genCCall64 CmmExpr
addr ForeignConvention
conv [LocalReg]
dest_regs [CmmExpr]
args = do
    config <- NatM NCGConfig
    let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
        word_size = Platform -> Int
platformWordSizeInBytes Platform
        wordFmt = Bool -> Format
archWordFormat (Platform -> Bool
target32Bit Platform

    -- Compute the code for loading arguments into registers,
    -- returning the leftover arguments that will need to be passed on the stack.
    -- NB: the code for loading references to data into registers is computed
    -- later (in 'pushArgs'), because we don't yet know where the data will be
    -- placed (due to alignment requirements).
      { stackArgs       = proper_stack_args
      , stackDataArgs   = stack_data_args
      , usedRegs        = arg_regs_used
      , assignArgsCode  = assign_args_code
      <- loadArgs config args


    -- Pad all arguments and data passed on stack to align them properly.
        (stk_args_with_padding, args_aligned_16) =
          padStackArgs platform (proper_stack_args, stack_data_args)

    -- Align stack to 16n for calls, assuming a starting stack
    -- alignment of 16n - word_size on procedure entry. Which we
    -- maintain. See Note [Stack Alignment on X86] in rts/StgCRun.c
        need_realign_call = Bool
    align_call_code <-
      if need_realign_call
      then addStackPadding word_size
      else return nilOL

    -- Compute the code that pushes data to the stack, and also
    -- the code that loads references to that data into registers,
    -- when the data is passed by reference in a register.
    (load_data_refs, push_code) <-
      pushArgs config proper_stack_args stk_args_with_padding

    -- On Windows, leave stack space for the arguments that we are passing
    -- in registers (the so-called shadow space).
    let shadow_space =
          if Platform -> OS
platformOS Platform
platform OS -> OS -> Bool
forall a. Eq a => a -> a -> Bool
== OS
          then Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
* [(Reg, Reg)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Platform -> [(Reg, Reg)]
allArgRegs Platform
            -- NB: the shadow store is always 8 * 4 = 32 bytes large,
            -- i.e. the cumulative size of rcx, rdx, r8, r9 (see 'allArgRegs').
          else Int
    shadow_space_code <- addStackPadding shadow_space

    let total_args_size
          = Int
          Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((StackArg -> Int) -> [StackArg] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Platform -> StackArg -> Int
stackArgSpace Platform
platform) [StackArg]
        real_size =
total_args_size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ if Bool
need_realign_call then Int
word_size else Int

    -- End of argument passing.
    -- Next step: emit the appropriate call instruction.
    delta <- getDeltaNat

    let -- The System V AMD64 ABI requires us to set %al to the number of SSE2
        -- registers that contain arguments, if the called routine
        -- is a varargs function.  We don't know whether it's a
        -- varargs function or not, so we have to assume it is.
        -- It's not safe to omit this assignment, even if the number
        -- of SSE2 regs in use is zero.  If %al is larger than 8
        -- on entry to a varargs function, seg faults ensue.
        nb_sse_regs_used = (RegWithFormat -> Bool) -> [RegWithFormat] -> Int
forall a. (a -> Bool) -> [a] -> Int
count (Format -> Bool
isFloatFormat (Format -> Bool)
-> (RegWithFormat -> Format) -> RegWithFormat -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RegWithFormat -> Format
regWithFormat_format) [RegWithFormat]
          = Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
nb_sse_regs_used)) (Reg -> Operand
OpReg Reg
          -- Note: we do this on Windows as well. It's not entirely clear why
          -- it's needed (the Windows X86_64 calling convention does not
          -- dictate it), but we get segfaults without it.
          -- One test case exhibiting the issue is T20030_test1j;
          -- if you change this, make sure to run it in a loop for a while
          -- with at least -j8 to check.

        -- Live registers we are annotating the call instruction with
        arg_regs = [Reg -> Format -> RegWithFormat
RegWithFormat Reg
eax Format
wordFmt] [RegWithFormat] -> [RegWithFormat] -> [RegWithFormat]
forall a. [a] -> [a] -> [a]
++ [RegWithFormat]

    -- deal with static vs dynamic call targets
    (callinsns,_cconv) <- case addr of
      CmmLit (CmmLabel CLabel
lbl) ->
        (InstrBlock, ForeignConvention)
-> NatM (InstrBlock, ForeignConvention)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Either Imm Reg -> [RegWithFormat] -> Instr
CALL (Imm -> Either Imm Reg
forall a b. a -> Either a b
Left (CLabel -> Imm
ImmCLbl CLabel
lbl)) [RegWithFormat]
arg_regs), ForeignConvention
_ -> do
        (dyn_r, dyn_c) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
        return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)

    let call = InstrBlock
callinsns InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
               [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL (
                    -- Deallocate parameters after call for ccall
                  (if Int
real_sizeInt -> Int -> Bool
forall a. Eq a => a -> a -> Bool
0 then [] else
                   [Format -> Operand -> Operand -> Instr
ADD (Width -> Format
intFormat (Platform -> Width
platformWordWidth Platform
platform)) (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
real_size)) (Reg -> Operand
OpReg Reg
                  [Instr] -> [Instr] -> [Instr]
forall a. [a] -> [a] -> [a]
                  [Int -> Instr
delta Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
    setDeltaNat (delta + real_size)

        -- assign the results, if necessary
        assign_code []     = InstrBlock
forall a. OrdList a
        assign_code [LocalReg
dest] =
          Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
            HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr
NCGConfig -> Format -> Reg -> Reg -> Instr
mkRegRegMoveInstr NCGConfig
config Format
fmt Reg
reg Reg
            reg :: Reg
reg = if Format -> Bool
isIntFormat Format
fmt then Reg
rax else Reg
            fmt :: Format
fmt = CmmType -> Format
cmmTypeFormat CmmType
            rep :: CmmType
rep = LocalReg -> CmmType
localRegType LocalReg
            r_dest :: Reg
r_dest = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform (LocalReg -> CmmReg
CmmLocal LocalReg
        assign_code [LocalReg]
_many = String -> InstrBlock
forall a. HasCallStack => String -> a
panic String
"genForeignCall.assign_code many"

    return (align_call_code     `appOL`
            push_code           `appOL`
            assign_args_code    `appOL`
            load_data_refs      `appOL`
            shadow_space_code   `appOL`
            assign_eax_sse_regs `appOL`
            call                `appOL`
            assign_code dest_regs)

-- -----------------------------------------------------------------------------
-- Loading arguments into registers for 64-bit C calls.

-- | Information needed to know how to pass arguments in a C call,
-- and in particular how to load them into registers.
data LoadArgs
  = LoadArgs
  -- | Arguments that should be passed on the stack
  { LoadArgs -> [RawStackArg]
stackArgs     :: [RawStackArg]
  -- | Additional values to store onto the stack.
  , LoadArgs -> [CmmExpr]
stackDataArgs :: [CmmExpr]
  -- | Which registers are we using for argument passing?
  , LoadArgs -> [RegWithFormat]
usedRegs      :: [RegWithFormat]
  -- | The code to assign arguments to registers used for argument passing.
  , LoadArgs -> InstrBlock
assignArgsCode :: InstrBlock
instance Semigroup LoadArgs where
  LoadArgs [RawStackArg]
a1 [CmmExpr]
d1 [RegWithFormat]
r1 InstrBlock
j1 <> :: LoadArgs -> LoadArgs -> LoadArgs
<> LoadArgs [RawStackArg]
a2 [CmmExpr]
d2 [RegWithFormat]
r2 InstrBlock
    = [RawStackArg]
-> [CmmExpr] -> [RegWithFormat] -> InstrBlock -> LoadArgs
LoadArgs ([RawStackArg]
a1 [RawStackArg] -> [RawStackArg] -> [RawStackArg]
forall a. [a] -> [a] -> [a]
++ [RawStackArg]
a2) ([CmmExpr]
d1 [CmmExpr] -> [CmmExpr] -> [CmmExpr]
forall a. [a] -> [a] -> [a]
++ [CmmExpr]
d2) ([RegWithFormat]
r1 [RegWithFormat] -> [RegWithFormat] -> [RegWithFormat]
forall a. [a] -> [a] -> [a]
++ [RegWithFormat]
r2) (InstrBlock
j1 InstrBlock -> InstrBlock -> InstrBlock
forall a. Semigroup a => a -> a -> a
S.<> InstrBlock
instance Monoid LoadArgs where
  mempty :: LoadArgs
mempty = [RawStackArg]
-> [CmmExpr] -> [RegWithFormat] -> InstrBlock -> LoadArgs
LoadArgs [] [] [] InstrBlock
forall a. OrdList a

-- | An argument passed on the stack, either directly or by reference.
-- The padding information hasn't yet been computed (see 'StackArg').
data RawStackArg
  -- | Pass the argument on the stack directly.
  = RawStackArg { RawStackArg -> CmmExpr
stackArgExpr :: CmmExpr }
  -- | Pass the argument by reference.
  | RawStackArgRef
    { RawStackArg -> StackRef
stackRef :: StackRef
       -- ^ is the reference passed in a register, or on the stack?
    , RawStackArg -> Int
stackRefArgSize :: Int
        -- ^ the size of the data pointed to
  deriving ( Int -> RawStackArg -> String -> String
[RawStackArg] -> String -> String
RawStackArg -> String
(Int -> RawStackArg -> String -> String)
-> (RawStackArg -> String)
-> ([RawStackArg] -> String -> String)
-> Show RawStackArg
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> RawStackArg -> String -> String
showsPrec :: Int -> RawStackArg -> String -> String
$cshow :: RawStackArg -> String
show :: RawStackArg -> String
$cshowList :: [RawStackArg] -> String -> String
showList :: [RawStackArg] -> String -> String
Show )

-- | An argument passed on the stack, either directly or by reference,
-- with additional padding information.
data StackArg
  -- | Pass the argument on the stack directly.
  = StackArg
      { StackArg -> CmmExpr
stackArgExpr :: CmmExpr
      , StackArg -> Int
stackArgPadding :: Int
        -- ^ padding required (in bytes)
  -- | Pass the argument by reference.
  | StackArgRef
     { StackArg -> StackRef
stackRef :: StackRef
        -- ^ where the reference is passed
     , StackArg -> Int
stackRefArgSize :: Int
        -- ^ the size of the data pointed to
     , StackArg -> Int
stackRefArgPadding :: Int
       -- ^ padding of the data pointed to
       -- (the reference itself never requires padding)
  deriving ( Int -> StackArg -> String -> String
[StackArg] -> String -> String
StackArg -> String
(Int -> StackArg -> String -> String)
-> (StackArg -> String)
-> ([StackArg] -> String -> String)
-> Show StackArg
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> StackArg -> String -> String
showsPrec :: Int -> StackArg -> String -> String
$cshow :: StackArg -> String
show :: StackArg -> String
$cshowList :: [StackArg] -> String -> String
showList :: [StackArg] -> String -> String
Show )

-- | Where is a reference to data on the stack passed?
data StackRef
  -- | In a register.
  = InReg Reg
  -- | On the stack.
  | OnStack
  deriving ( StackRef -> StackRef -> Bool
(StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> Bool) -> Eq StackRef
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: StackRef -> StackRef -> Bool
== :: StackRef -> StackRef -> Bool
$c/= :: StackRef -> StackRef -> Bool
/= :: StackRef -> StackRef -> Bool
Eq, Eq StackRef
Eq StackRef =>
(StackRef -> StackRef -> Ordering)
-> (StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> Bool)
-> (StackRef -> StackRef -> StackRef)
-> (StackRef -> StackRef -> StackRef)
-> Ord StackRef
StackRef -> StackRef -> Bool
StackRef -> StackRef -> Ordering
StackRef -> StackRef -> StackRef
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: StackRef -> StackRef -> Ordering
compare :: StackRef -> StackRef -> Ordering
$c< :: StackRef -> StackRef -> Bool
< :: StackRef -> StackRef -> Bool
$c<= :: StackRef -> StackRef -> Bool
<= :: StackRef -> StackRef -> Bool
$c> :: StackRef -> StackRef -> Bool
> :: StackRef -> StackRef -> Bool
$c>= :: StackRef -> StackRef -> Bool
>= :: StackRef -> StackRef -> Bool
$cmax :: StackRef -> StackRef -> StackRef
max :: StackRef -> StackRef -> StackRef
$cmin :: StackRef -> StackRef -> StackRef
min :: StackRef -> StackRef -> StackRef
Ord, Int -> StackRef -> String -> String
[StackRef] -> String -> String
StackRef -> String
(Int -> StackRef -> String -> String)
-> (StackRef -> String)
-> ([StackRef] -> String -> String)
-> Show StackRef
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> StackRef -> String -> String
showsPrec :: Int -> StackRef -> String -> String
$cshow :: StackRef -> String
show :: StackRef -> String
$cshowList :: [StackRef] -> String -> String
showList :: [StackRef] -> String -> String
Show )

newtype Padding = Padding { Padding -> Int
paddingBytes :: Int }
  deriving ( Int -> Padding -> String -> String
[Padding] -> String -> String
Padding -> String
(Int -> Padding -> String -> String)
-> (Padding -> String)
-> ([Padding] -> String -> String)
-> Show Padding
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> Padding -> String -> String
showsPrec :: Int -> Padding -> String -> String
$cshow :: Padding -> String
show :: Padding -> String
$cshowList :: [Padding] -> String -> String
showList :: [Padding] -> String -> String
Show, Padding -> Padding -> Bool
(Padding -> Padding -> Bool)
-> (Padding -> Padding -> Bool) -> Eq Padding
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Padding -> Padding -> Bool
== :: Padding -> Padding -> Bool
$c/= :: Padding -> Padding -> Bool
/= :: Padding -> Padding -> Bool
Eq, Eq Padding
Eq Padding =>
(Padding -> Padding -> Ordering)
-> (Padding -> Padding -> Bool)
-> (Padding -> Padding -> Bool)
-> (Padding -> Padding -> Bool)
-> (Padding -> Padding -> Bool)
-> (Padding -> Padding -> Padding)
-> (Padding -> Padding -> Padding)
-> Ord Padding
Padding -> Padding -> Bool
Padding -> Padding -> Ordering
Padding -> Padding -> Padding
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Padding -> Padding -> Ordering
compare :: Padding -> Padding -> Ordering
$c< :: Padding -> Padding -> Bool
< :: Padding -> Padding -> Bool
$c<= :: Padding -> Padding -> Bool
<= :: Padding -> Padding -> Bool
$c> :: Padding -> Padding -> Bool
> :: Padding -> Padding -> Bool
$c>= :: Padding -> Padding -> Bool
>= :: Padding -> Padding -> Bool
$cmax :: Padding -> Padding -> Padding
max :: Padding -> Padding -> Padding
$cmin :: Padding -> Padding -> Padding
min :: Padding -> Padding -> Padding
Ord )

-- | How much space does this 'StackArg' take up on the stack?
-- Only counts the "reference" part for references, not the data it points to.
stackArgSpace :: Platform -> StackArg -> Int
stackArgSpace :: Platform -> StackArg -> Int
stackArgSpace Platform
platform = \case
  StackArg CmmExpr
arg Int
padding ->
    Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
arg Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
  StackArgRef { stackRef :: StackArg -> StackRef
stackRef = StackRef
ref } ->
    case StackRef
ref of
      InReg   {} -> Int
      OnStack {} -> Int

-- | Pad arguments, assuming we start aligned to a 16-byte boundary.
-- Returns padded arguments, together with whether we end up aligned
-- to a 16-byte boundary.
padStackArgs :: Platform
             -> ([RawStackArg], [CmmExpr])
             -> ([StackArg], Bool)
padStackArgs :: Platform -> ([RawStackArg], [CmmExpr]) -> ([StackArg], Bool)
padStackArgs Platform
platform ([RawStackArg]
args0, [CmmExpr]
data_args0) =
    -- Pad the direct args
    ([(RawStackArg, Padding)]
args, Bool
align_16_mid) = Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
pad_args Bool
True [RawStackArg]

    -- Pad the data section
    ([(RawStackArg, Padding)]
data_args, Bool
align_16_end) = Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
pad_args Bool
align_16_mid ((CmmExpr -> RawStackArg) -> [CmmExpr] -> [RawStackArg]
forall a b. (a -> b) -> [a] -> [b]
map CmmExpr -> RawStackArg
RawStackArg [CmmExpr]

    -- Now figure out where the data is placed relative to the direct arguments,
    -- in order to resolve references.
    resolve_args :: [(RawStackArg, Padding)] -> [Padding] -> [StackArg]
    resolve_args :: [(RawStackArg, Padding)] -> [Padding] -> [StackArg]
resolve_args [] [Padding]
_ = []
    resolve_args ((RawStackArg
stk_arg, Padding Int
pad):[(RawStackArg, Padding)]
rest) [Padding]
pads =
      let (StackArg
this_arg, [Padding]
pads') =
            case RawStackArg
stk_arg of
              RawStackArg CmmExpr
arg -> (CmmExpr -> Int -> StackArg
StackArg CmmExpr
arg Int
pad, [Padding]
              RawStackArgRef StackRef
ref Int
size ->
                let (Padding Int
arg_pad : [Padding]
rest_pads) = [Padding]
                    arg :: StackArg
arg =
                        { stackRef :: StackRef
stackRef = StackRef
                        , stackRefArgSize :: Int
stackRefArgSize = Int
                        , stackRefArgPadding :: Int
stackRefArgPadding = Int
arg_pad }
                in (StackArg
arg, [Padding]
      in StackArg
this_arg StackArg -> [StackArg] -> [StackArg]
forall a. a -> [a] -> [a]
: [(RawStackArg, Padding)] -> [Padding] -> [StackArg]
resolve_args [(RawStackArg, Padding)]
rest [Padding]

    ( [(RawStackArg, Padding)] -> [Padding] -> [StackArg]
resolve_args [(RawStackArg, Padding)]
args (((RawStackArg, Padding) -> Padding)
-> [(RawStackArg, Padding)] -> [Padding]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (RawStackArg, Padding) -> Padding
forall a b. (a, b) -> b
snd [(RawStackArg, Padding)]
data_args) [StackArg] -> [StackArg] -> [StackArg]
forall a. [a] -> [a] -> [a]
        [ case RawStackArg
data_arg of
            RawStackArg CmmExpr
arg -> CmmExpr -> Int -> StackArg
StackArg CmmExpr
arg Int
            RawStackArgRef {} -> String -> StackArg
forall a. HasCallStack => String -> a
panic String
"padStackArgs: reference in data section"
        | (RawStackArg
data_arg, Padding Int
pad) <- [(RawStackArg, Padding)]
    , Bool
align_16_end )

    pad_args :: Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
    pad_args :: Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
pad_args Bool
aligned_16 [] = ([], Bool
    pad_args Bool
aligned_16 (RawStackArg
      | Int
needed_alignment Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
      -- We don't know if the stack is aligned to 8 (mod 32) or 24 (mod 32).
      -- This makes aligning the stack to a 32 or 64 byte boundary more
      -- complicated, in particular with DELTA.
      = String -> ([(RawStackArg, Padding)], Bool)
forall a. HasCallStack => String -> a
sorry (String -> ([(RawStackArg, Padding)], Bool))
-> String -> ([(RawStackArg, Padding)], Bool)
forall a b. (a -> b) -> a -> b
$ [String] -> String
        [ String
"X86_86 C call: unsupported argument."
        , String
"  Alignment requirement: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
needed_alignment String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" bytes."
        , if Platform -> OS
platformOS Platform
platform OS -> OS -> Bool
forall a. Eq a => a -> a -> Bool
== OS
          then String
"  The X86_64 NCG does not (yet) support Windows C calls with 256/512 bit vectors."
          else String
"  The X86_64 NCG cannot (yet) pass 256/512 bit vectors on the stack for C calls."
        , String
"  Please use the LLVM backend (-fllvm)." ]
      | Bool
      = let ( [(RawStackArg, Padding)]
rest, Bool
final_align_16 ) = Bool -> [RawStackArg] -> ([(RawStackArg, Padding)], Bool)
pad_args Bool
next_aligned_16 [RawStackArg]
        in  ( (RawStackArg
arg, Int -> Padding
Padding Int
padding) (RawStackArg, Padding)
-> [(RawStackArg, Padding)] -> [(RawStackArg, Padding)]
forall a. a -> [a] -> [a]
: [(RawStackArg, Padding)]
rest, Bool
final_align_16 )

        needed_alignment :: Int
needed_alignment = case RawStackArg
arg of
          RawStackArg CmmExpr
arg   -> Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
          RawStackArgRef {} -> Platform -> Int
platformWordSizeInBytes Platform
        padding :: Int
          | Int
needed_alignment Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
16 Bool -> Bool -> Bool
|| Bool
          = Int
          | Bool
          = Int
        next_aligned_16 :: Bool
next_aligned_16 = Bool -> Bool
not ( Bool
aligned_16 Bool -> Bool -> Bool
&& Int
needed_alignment Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
16 )

-- | Load arguments into available registers.
loadArgs :: NCGConfig -> [CmmExpr] -> NatM LoadArgs
loadArgs :: NCGConfig -> [CmmExpr] -> NatM LoadArgs
loadArgs NCGConfig
config [CmmExpr]
  | Platform -> OS
platformOS Platform
platform OS -> OS -> Bool
forall a. Eq a => a -> a -> Bool
== OS
  = StateT [(Reg, Reg)] NatM LoadArgs -> [(Reg, Reg)] -> NatM LoadArgs
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT (NCGConfig -> [CmmExpr] -> StateT [(Reg, Reg)] NatM LoadArgs
loadArgsWin NCGConfig
config [CmmExpr]
args) (Platform -> [(Reg, Reg)]
allArgRegs Platform
  | Bool
  = StateT ([Reg], [Reg]) NatM LoadArgs
-> ([Reg], [Reg]) -> NatM LoadArgs
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT (NCGConfig -> [CmmExpr] -> StateT ([Reg], [Reg]) NatM LoadArgs
loadArgsSysV NCGConfig
config [CmmExpr]
args) (Platform -> [Reg]
allIntArgRegs Platform
                                          ,Platform -> [Reg]
allFPArgRegs  Platform
    platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig

-- | Load arguments into available registers (System V AMD64 ABI).
loadArgsSysV :: NCGConfig
             -> [CmmExpr]
             -> StateT ([Reg], [Reg]) NatM LoadArgs
loadArgsSysV :: NCGConfig -> [CmmExpr] -> StateT ([Reg], [Reg]) NatM LoadArgs
loadArgsSysV NCGConfig
_ [] = LoadArgs -> StateT ([Reg], [Reg]) NatM LoadArgs
forall a. a -> StateT ([Reg], [Reg]) NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return LoadArgs
forall a. Monoid a => a
loadArgsSysV NCGConfig
config (CmmExpr
rest) = do
  (iregs, fregs) <- StateT ([Reg], [Reg]) NatM ([Reg], [Reg])
forall (m :: * -> *) s. Monad m => StateT s m s
  -- No available registers: pass everything on the stack (shortcut).
  if null iregs && null fregs
  then return $
            { stackArgs       = map RawStackArg (arg:rest)
            , stackDataArgs   = []
            , assignArgsCode  = nilOL
            , usedRegs        = []
  else do
    mbReg <-
        | isIntFormat arg_fmt
        , ireg:iregs' <- iregs
        -> do put (iregs', fregs)
              return $ Just ireg
        | isFloatFormat arg_fmt || isVecFormat arg_fmt
        , freg:fregs' <- fregs
        -> do put (iregs, fregs')
              return $ Just freg
        | otherwise
        -> return Nothing
    this_arg <-
      case mbReg of
        Just Reg
reg -> do
          assign_code <- NatM InstrBlock -> StateT ([Reg], [Reg]) NatM InstrBlock
forall (m :: * -> *) a. Monad m => m a -> StateT ([Reg], [Reg]) m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (NatM InstrBlock -> StateT ([Reg], [Reg]) NatM InstrBlock)
-> NatM InstrBlock -> StateT ([Reg], [Reg]) NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ CmmExpr -> Reg -> NatM InstrBlock
loadArgIntoReg CmmExpr
arg Reg
          return $
                { stackArgs       = [] -- passed in register
                , stackDataArgs   = []
                , assignArgsCode  = assign_code
                , usedRegs        = [RegWithFormat reg arg_fmt]
        Maybe Reg
Nothing -> do
          LoadArgs -> StateT ([Reg], [Reg]) NatM LoadArgs
forall a. a -> StateT ([Reg], [Reg]) NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (LoadArgs -> StateT ([Reg], [Reg]) NatM LoadArgs)
-> LoadArgs -> StateT ([Reg], [Reg]) NatM LoadArgs
forall a b. (a -> b) -> a -> b
            -- No available register for this argument: pass it on the stack.
                { stackArgs :: [RawStackArg]
stackArgs       = [CmmExpr -> RawStackArg
RawStackArg CmmExpr
                , stackDataArgs :: [CmmExpr]
stackDataArgs   = []
                , assignArgsCode :: InstrBlock
assignArgsCode  = InstrBlock
forall a. OrdList a
                , usedRegs :: [RegWithFormat]
usedRegs        = []
    others <- loadArgsSysV config rest
    return $ this_arg S.<> others

    platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
    arg_fmt :: Format
arg_fmt = CmmType -> Format
cmmTypeFormat (Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr

-- | Compute all things that will need to be pushed to the stack.
-- On Windows, an argument passed by reference will require two pieces of data:
--  - the reference (returned in the first position)
--  - the actual data (returned in the second position)
computeWinPushArgs :: Platform -> [CmmExpr] -> ([RawStackArg], [CmmExpr])
computeWinPushArgs :: Platform -> [CmmExpr] -> ([RawStackArg], [CmmExpr])
computeWinPushArgs Platform
platform = [CmmExpr] -> ([RawStackArg], [CmmExpr])
    go :: [CmmExpr] -> ([RawStackArg], [CmmExpr])
    go :: [CmmExpr] -> ([RawStackArg], [CmmExpr])
go [] = ([], [])
    go (CmmExpr
args) =
        arg_size :: Int
arg_size = Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
this_arg, [CmmExpr] -> [CmmExpr]
          | Int
arg_size Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
          = ( StackRef -> Int -> RawStackArg
RawStackArgRef StackRef
OnStack Int
arg_size, (CmmExpr
arg CmmExpr -> [CmmExpr] -> [CmmExpr]
forall a. a -> [a] -> [a]
:) )
          | Bool
          = ( CmmExpr -> RawStackArg
RawStackArg CmmExpr
arg, [CmmExpr] -> [CmmExpr]
forall a. a -> a
id )
stk_args, [CmmExpr]
stk_data) = [CmmExpr] -> ([RawStackArg], [CmmExpr])
go [CmmExpr]
this_argRawStackArg -> [RawStackArg] -> [RawStackArg]
forall a. a -> [a] -> [a]
stk_args, [CmmExpr] -> [CmmExpr]
add_this_arg [CmmExpr]

-- | Load arguments into available registers (Windows C X64 calling convention).
loadArgsWin :: NCGConfig -> [CmmExpr] -> StateT [(Reg,Reg)] NatM LoadArgs
loadArgsWin :: NCGConfig -> [CmmExpr] -> StateT [(Reg, Reg)] NatM LoadArgs
loadArgsWin NCGConfig
_ [] = LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall a. a -> StateT [(Reg, Reg)] NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return LoadArgs
forall a. Monoid a => a
loadArgsWin NCGConfig
config (CmmExpr
rest) = do
  regs <- StateT [(Reg, Reg)] NatM [(Reg, Reg)]
forall (m :: * -> *) s. Monad m => StateT s m s
  case regs of
    (Reg, Reg)
reg:[(Reg, Reg)]
regs' -> do
      [(Reg, Reg)] -> StateT [(Reg, Reg)] NatM ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
put [(Reg, Reg)]
      this_arg <- NatM LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall (m :: * -> *) a. Monad m => m a -> StateT [(Reg, Reg)] m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (NatM LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs)
-> NatM LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall a b. (a -> b) -> a -> b
$ (Reg, Reg) -> NatM LoadArgs
load_arg_win (Reg, Reg)
      rest <- loadArgsWin config rest
      return $ this_arg S.<> rest
    [] -> do
      -- No more registers available: pass all (remaining) arguments on the stack.
      let ([RawStackArg]
stk_args, [CmmExpr]
data_args) = Platform -> [CmmExpr] -> ([RawStackArg], [CmmExpr])
computeWinPushArgs Platform
platform (CmmExpr
argCmmExpr -> [CmmExpr] -> [CmmExpr]
forall a. a -> [a] -> [a]
      LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall a. a -> StateT [(Reg, Reg)] NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs)
-> LoadArgs -> StateT [(Reg, Reg)] NatM LoadArgs
forall a b. (a -> b) -> a -> b
          { stackArgs :: [RawStackArg]
stackArgs       = [RawStackArg]
          , stackDataArgs :: [CmmExpr]
stackDataArgs   = [CmmExpr]
          , assignArgsCode :: InstrBlock
assignArgsCode  = InstrBlock
forall a. OrdList a
          , usedRegs :: [RegWithFormat]
usedRegs        = []
    platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
    arg_fmt :: Format
arg_fmt = CmmType -> Format
cmmTypeFormat (CmmType -> Format) -> CmmType -> Format
forall a b. (a -> b) -> a -> b
$ Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
    load_arg_win :: (Reg, Reg) -> NatM LoadArgs
load_arg_win (Reg
ireg, Reg
      | Format -> Bool
isVecFormat Format
       -- Vectors are passed by reference.
       -- See Note [The Windows X64 C calling convention].
      = do LoadArgs -> NatM LoadArgs
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (LoadArgs -> NatM LoadArgs) -> LoadArgs -> NatM LoadArgs
forall a b. (a -> b) -> a -> b
                -- Pass the reference in a register,
                -- and the argument data on the stack.
                { stackArgs :: [RawStackArg]
stackArgs       = [StackRef -> Int -> RawStackArg
RawStackArgRef (Reg -> StackRef
InReg Reg
ireg) (Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
                , stackDataArgs :: [CmmExpr]
stackDataArgs   = [CmmExpr
arg] -- we don't yet know where the data will reside,
                , assignArgsCode :: InstrBlock
assignArgsCode  = InstrBlock
forall a. OrdList a
nilOL -- so we defer computing the reference and storing it
                                          -- in the register until later
                , usedRegs :: [RegWithFormat]
usedRegs        = [Reg -> Format -> RegWithFormat
RegWithFormat Reg
ireg Format
      | Bool
      = do let arg_reg :: Reg
                  | Format -> Bool
isFloatFormat Format
                  = Reg
                  | Bool
                  = Reg
           assign_code <- CmmExpr -> Reg -> NatM InstrBlock
loadArgIntoReg CmmExpr
arg Reg
           -- Recall that, for varargs, we must pass floating-point
           -- arguments in both fp and integer registers.
           let (assign_code', regs')
                | isFloatFormat arg_fmt =
                    ( assign_code `snocOL` MOVD FF64 (OpReg freg) (OpReg ireg),
                      [ RegWithFormat freg FF64
                      , RegWithFormat ireg II64 ])
                | otherwise = (assign_code, [RegWithFormat ireg II64])
           return $
               { stackArgs       = [] -- passed in register
               , stackDataArgs   = []
               , assignArgsCode = assign_code'
               , usedRegs = regs'

-- | Load an argument into a register.
-- Assumes that the expression does not contain any MachOps,
-- as per Note [Evaluate C-call arguments before placing in destination registers].
loadArgIntoReg :: CmmExpr -> Reg -> NatM InstrBlock
loadArgIntoReg :: CmmExpr -> Reg -> NatM InstrBlock
loadArgIntoReg CmmExpr
arg Reg
reg = do
  Bool -> NatM () -> NatM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool
debugIsOn Bool -> Bool -> Bool
&& CmmExpr -> Bool
loadIntoRegMightClobberOtherReg CmmExpr
arg) (NatM () -> NatM ()) -> NatM () -> NatM ()
forall a b. (a -> b) -> a -> b
$ do
    platform <- NatM Platform
    massertPpr False $
      vcat [ text "loadArgIntoReg: arg might contain MachOp"
           , text "arg:" <+> pdoc platform arg ]
  arg_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
  return $ arg_code reg

-- -----------------------------------------------------------------------------
-- Pushing arguments onto the stack for 64-bit C calls.

-- | The size of an argument (in bytes).
-- Never smaller than the platform word width.
argSize :: Platform -> CmmExpr -> Int
argSize :: Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
arg =
  Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Platform -> Int
platformWordSizeInBytes Platform
platform) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
    Width -> Int
widthInBytes (CmmType -> Width
typeWidth (CmmType -> Width) -> CmmType -> Width
forall a b. (a -> b) -> a -> b
$ Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr

-- | Add the given amount of padding on the stack.
addStackPadding :: Int -- ^ padding (in bytes)
                -> NatM InstrBlock
addStackPadding :: Int -> NatM InstrBlock
addStackPadding Int
  | Int
pad_bytes Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
  = InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return InstrBlock
forall a. OrdList a
  | Bool
  = do delta <- NatM Int
       setDeltaNat (delta - pad_bytes)
       return $
         toOL [ SUB II64 (OpImm (ImmInt pad_bytes)) (OpReg rsp)
              , DELTA (delta - pad_bytes)

-- | Push one argument directly to the stack (by value).
-- Assumes the current stack pointer fulfills any necessary alignment requirements.
pushArgByValue :: NCGConfig -> CmmExpr -> NatM InstrBlock
pushArgByValue :: NCGConfig -> CmmExpr -> NatM InstrBlock
pushArgByValue NCGConfig
config CmmExpr
   -- For 64-bit integer arguments, use PUSH II64.
   -- Note: we *must not* do this for smaller arguments.
   -- For example, if we tried to push an argument such as @CmmLoad addr W32 aln@,
   -- we could end up reading unmapped memory and segfaulting.
   | Format -> Bool
isIntFormat Format
   , Format -> Int
formatInBytes Format
fmt Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
   = do
     (arg_op, arg_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getOperand CmmExpr
     delta <- getDeltaNat
     setDeltaNat (delta-arg_size)
     return $
       arg_code `appOL` toOL
       [ PUSH II64 arg_op
       , DELTA (delta-arg_size) ]

   | Bool
   = do
     (arg_reg, arg_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
     delta <- getDeltaNat
     setDeltaNat (delta-arg_size)
     return $ arg_code `appOL` toOL
        [ SUB (intFormat (wordWidth platform)) (OpImm (ImmInt arg_size)) (OpReg rsp)
        , DELTA (delta-arg_size)
        , movInstr config fmt (OpReg arg_reg) (OpAddr (spRel platform 0)) ]

      platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig
      arg_size :: Int
arg_size = Platform -> CmmExpr -> Int
argSize Platform
platform CmmExpr
      arg_rep :: CmmType
arg_rep = Platform -> CmmExpr -> CmmType
cmmExprType Platform
platform CmmExpr
      fmt :: Format
fmt = CmmType -> Format
cmmTypeFormat CmmType

-- | Load an argument into a register or push it to the stack.
loadOrPushArg :: NCGConfig -> (StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock)
loadOrPushArg :: NCGConfig -> (StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock)
loadOrPushArg NCGConfig
config (StackArg
stk_arg, Maybe Int
mb_off) =
  case StackArg
stk_arg of
    StackArg CmmExpr
arg Int
pad -> do
      push_code <- NCGConfig -> CmmExpr -> NatM InstrBlock
pushArgByValue NCGConfig
config CmmExpr
      pad_code  <- addStackPadding pad
      return (nilOL, push_code `appOL` pad_code)
    StackArgRef { stackRef :: StackArg -> StackRef
stackRef = StackRef
ref } ->
      case StackRef
ref of
        -- Pass the reference in a register
        InReg Reg
ireg ->
          (InstrBlock, InstrBlock) -> NatM (InstrBlock, InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Operand -> Instr
LEA Format
II64 (AddrMode -> Operand
OpAddr (Platform -> Int -> AddrMode
spRel Platform
platform Int
off)) (Reg -> Operand
OpReg Reg
ireg), InstrBlock
forall a. OrdList a
        -- Pass the reference on the stack
        OnStack {} -> do
          tmp <- Format -> NatM Reg
getNewRegNat Format
          delta <- getDeltaNat
          setDeltaNat (delta-arg_ref_size)
          let push_code = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
                [ Format -> Operand -> Operand -> Instr
SUB (Width -> Format
intFormat (Platform -> Width
wordWidth Platform
platform)) (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
arg_ref_size)) (Reg -> Operand
OpReg Reg
                , Int -> Instr
deltaInt -> Int -> Int
forall a. Num a => a -> a -> a
                , Format -> Operand -> Operand -> Instr
LEA Format
II64 (AddrMode -> Operand
OpAddr (Platform -> Int -> AddrMode
spRel Platform
platform Int
off)) (Reg -> Operand
OpReg Reg
                , Format -> Operand -> Operand -> Instr
MOV Format
II64 (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr (Platform -> Int -> AddrMode
spRel Platform
platform Int
0)) ]
          return (nilOL, push_code)
      where off :: Int
off = String -> Maybe Int -> Int
forall a. HasDebugCallStack => String -> Maybe a -> a
expectJust String
"push_arg_win offset" Maybe Int
      arg_ref_size :: Int
arg_ref_size = Int
8 -- passing a reference to the argument
      platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig

-- | Push arguments to the stack, right to left.
-- On Windows, some arguments may need to be passed by reference,
-- which requires separately passing the data and the reference.
-- See Note [The Windows X64 C calling convention].
pushArgs :: NCGConfig
         -> [RawStackArg]
            -- ^ arguments proper (i.e. don't include the data for arguments passed by reference)
         -> [StackArg]
            -- ^ arguments we are passing on the stack
         -> NatM (InstrBlock, InstrBlock)
pushArgs :: NCGConfig
-> [RawStackArg] -> [StackArg] -> NatM (InstrBlock, InstrBlock)
pushArgs NCGConfig
config [RawStackArg]
proper_args [StackArg]
  = do { let
            vec_offs :: [Maybe Int]
            vec_offs :: [Maybe Int]
              | Platform -> OS
platformOS Platform
platform OS -> OS -> Bool
forall a. Eq a => a -> a -> Bool
== OS
              = Int -> [StackArg] -> [Maybe Int]
go Int
stack_arg_size [StackArg]
              | Bool
              = Maybe Int -> [Maybe Int]
forall a. a -> [a]
repeat Maybe Int
forall a. Maybe a

    -- Windows-only code

            -- Size of the arguments we are passing on the stack, counting only
            -- the reference part for arguments passed by reference.
            stack_arg_size :: Int
stack_arg_size = Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (RawStackArg -> Bool) -> [RawStackArg] -> Int
forall a. (a -> Bool) -> [a] -> Int
count RawStackArg -> Bool
not_in_reg [RawStackArg]
            not_in_reg :: RawStackArg -> Bool
not_in_reg (RawStackArg {}) = Bool
            not_in_reg (RawStackArgRef { stackRef :: RawStackArg -> StackRef
stackRef = StackRef
ref }) =
              case StackRef
ref of
                InReg {} -> Bool
                OnStack {} -> Bool

            -- Check an offset is valid (8-byte aligned), for assertions.
            ok :: a -> Bool
ok a
off = a
off a -> a -> a
forall a. Integral a => a -> a -> a
`rem` a
8 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a

            -- Tricky code: compute the stack offset to the vector data
            -- for this argument.
            -- If you're confused, Note [The Windows X64 C calling convention]
            -- contains a helpful diagram.
            go :: Int -> [StackArg] -> [Maybe Int]
            go :: Int -> [StackArg] -> [Maybe Int]
go Int
_ [] = []
            go Int
off (StackArg
args) =
              Bool -> SDoc -> [Maybe Int] -> [Maybe Int]
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr (Int -> Bool
forall {a}. Integral a => a -> Bool
ok Int
off) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"unaligned offset:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int
off) ([Maybe Int] -> [Maybe Int]) -> [Maybe Int] -> [Maybe Int]
forall a b. (a -> b) -> a -> b
              case StackArg
stk_arg of
                StackArg {} ->
                  -- Only account for the stack pointer movement.
                  let off' :: Int
off' = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
- Platform -> StackArg -> Int
stackArgSpace Platform
platform StackArg
                  in Maybe Int
forall a. Maybe a
Nothing Maybe Int -> [Maybe Int] -> [Maybe Int]
forall a. a -> [a] -> [a]
: Int -> [StackArg] -> [Maybe Int]
go Int
off' [StackArg]
                  { stackRefArgSize :: StackArg -> Int
stackRefArgSize    = Int
                  , stackRefArgPadding :: StackArg -> Int
stackRefArgPadding = Int
data_pad } ->
                  Bool -> SDoc -> [Maybe Int] -> [Maybe Int]
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr (Int -> Bool
forall {a}. Integral a => a -> Bool
ok Int
data_size) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"unaligned data size:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int
data_size) ([Maybe Int] -> [Maybe Int]) -> [Maybe Int] -> [Maybe Int]
forall a b. (a -> b) -> a -> b
                  Bool -> SDoc -> [Maybe Int] -> [Maybe Int]
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr (Int -> Bool
forall {a}. Integral a => a -> Bool
ok Int
data_pad) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"unaligned data padding:" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int
data_pad) ([Maybe Int] -> [Maybe Int]) -> [Maybe Int] -> [Maybe Int]
forall a b. (a -> b) -> a -> b
                  let off' :: Int
off' = Int
                        -- Next piece of data is after the data for this reference
                           Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
data_size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
                        -- ... and account for the stack pointer movement.
                           Int -> Int -> Int
forall a. Num a => a -> a -> a
- Platform -> StackArg -> Int
stackArgSpace Platform
platform StackArg
                  in Int -> Maybe Int
forall a. a -> Maybe a
Just (Int
data_pad Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
off) Maybe Int -> [Maybe Int] -> [Maybe Int]
forall a. a -> [a] -> [a]
: Int -> [StackArg] -> [Maybe Int]
go Int
off' [StackArg]

    -- end of Windows-only code

         -- Push the stack arguments (right to left),
         -- including both the reference and the data for arguments passed by reference.
       ; (load_regs, push_args) <- ((StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock))
-> [(StackArg, Maybe Int)] -> NatM (InstrBlock, InstrBlock)
forall (m :: * -> *) (t :: * -> *) b a.
(Applicative m, Foldable t, Monoid b) =>
(a -> m b) -> t a -> m b
foldMapM (NCGConfig -> (StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock)
loadOrPushArg NCGConfig
config) ([(StackArg, Maybe Int)] -> [(StackArg, Maybe Int)]
forall a. [a] -> [a]
reverse ([(StackArg, Maybe Int)] -> [(StackArg, Maybe Int)])
-> [(StackArg, Maybe Int)] -> [(StackArg, Maybe Int)]
forall a b. (a -> b) -> a -> b
$ [StackArg] -> [Maybe Int] -> [(StackArg, Maybe Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [StackArg]
all_stk_args [Maybe Int]
       ; return (load_regs, push_args) }
    platform :: Platform
platform = NCGConfig -> Platform
ncgPlatform NCGConfig

{- Note [The Windows X64 C calling convention]
Here are a few facts about the Windows X64 C calling convention that
are important:

  - any argument larger than 8 bytes must be passed by reference,
    and arguments smaller than 8 bytes are padded to 8 bytes.

  - the first four arguments are passed in registers:
      - floating-point scalar arguments are passed in %xmm0, %xmm1, %xmm2, %xmm3
      - other arguments are passed in %rcx, %rdx, %r8, %r9
        (this includes vector arguments, passed by reference)

    For variadic functions, it is additionally expected that floating point
    scalar arguments are copied to the corresponding integer register, e.g.
    the data in xmm2 should also be copied to r8.

    There is no requirement about setting %al like there is for the
    System V AMD64 ABI.

  - subsequent arguments are passed on the stack.

There are also alignment requirements:

  - the data for vectors must be aligned to the size of the vector,
    e.g. a 32 byte vector must be aligned on a 32 byte boundary,

  - the call instruction must be aligned to 16 bytes.
  (This differs from the System V AMD64 ABI, which mandates that the call
  instruction must be aligned to 32 bytes if there are any 32 byte vectors
  passed on the stack.)

This motivates our handling of vector values. Suppose we have a function call
with many arguments, several of them being vectors. We proceed as follows:

 - Add some padding, if necessary, to ensure the stack, when executing the call
    instruction, is 16-byte aligned. Whether this padding is necessary depends
    on what happens next. (Recall also that we start off at 8 (mod 16) alignment,
    as per Note [Stack Alignment on X86] in rts/StgCRun.c)
  - Push all the vectors to the stack first, adding padding after each one
    if necessary.
  - Then push the arguments:
      - for non-vectors, proceed as usual,
      - for vectors, push the address of the vector data we pushed above.
  - Then assign the registers:
      - for non-vectors, proceed as usual,
      - for vectors, store the address in a general-purpose register, as opposed
        to storing the data in an xmm register.

For a concrete example, suppose we have a call of the form:

  f x1 x2 x3 x4 x5 x6 x7

in which:

  - x2, x3, x5 and x7 are 16 byte vectors
  - the other arguments are all 8 byte wide

Now, x1, x2, x3, x4 will get passed in registers, except that we pass
x2 and x3 by reference, because they are vectors. We proceed as follows:

  - push the vectors to the stack: x7, x5, x3, x2 (in that order)
  - push the stack arguments in order: addr(x7), x6, addr(x5)
  - load the remaining arguments into registers: x4, addr(x3), addr(x2), x1

The tricky part is to get the right offsets for the addresses of the vector
data. The following visualisation will hopefully clear things up:

                                  │▓▓│ ─── padding to align the call instruction
                      ╭─╴         ╞══╡     (ensures Sp, below, is 16-byte aligned)
                      │           │  │
                      │  x7  ───╴ │  │
                      │           ├──┤
                      │           │  │
                      │  x5  ───╴ │  │
                      │           ├──┤
     vector data  ────┤           │  │
(individually padded) │  x3  ───╴ │  │
                      │           ├──┤
                      │           │  │
                      │  x2  ───╴ │  │
                      │           ├┄┄┤
                      │           │▓▓│ ─── padding to align x2 to 16 bytes
               ╭─╴    ╰─╴         ╞══╡
               │    addr(x7) ───╴ │  │    ╭─ from here: x7 is +64
               │                  ├──┤ ╾──╯    = 64 (position of x5)
     stack  ───┤         x6  ───╴ │  │         + 16 (size of x5) + 0 (padding of x7)
   arguments   │                  ├──┤         - 2 * 8 (x7 is 2 arguments higher than x5)
               │    addr(x5) ───╴ │  │
               ╰─╴            ╭─╴ ╞══╡ ╾─── from here:
                              │   │  │       - x2 is +32 = 24 (stack_arg_size) + 8 (padding of x2)
                   shadow  ───┤   │  │       - x3 is +48 = 32 (position of x2) + 16 (size of x2) + 0 (padding of x3)
                    space     │   │  │       - x5 is +64 = 48 (position of x3) + 16 (size of x3) + 0 (padding of x5)
                              │   │  │
                              ╰─╴ └──┘ ╾─── Sp

This is all tested in the simd013 test.

-- -----------------------------------------------------------------------------
-- Generating a table-branch

Note [Sub-word subtlety during jump-table indexing]
Offset the index by the start index of the jump table.
It's important that we do this *before* the widening below. To see
why, consider a switch with a sub-word, signed discriminant such as:

    switch [-5...+2] x::I16 {
        case -5: ...
        case +2: ...

Consider what happens if we offset *after* widening in the case that

                                         // x == -4 == 0xfffc::I16
    indexWidened = UU_Conv(x);           // == 0xfffc::I64
    indexExpr    = indexWidened - (-5);  // == 0x10000::I64

This index is clearly nonsense given that the jump table only has
eight entries.

By contrast, if we widen *after* we offset then we get the correct
index (1),

                                         // x == -4 == 0xfffc::I16
    indexOffset  = x - (-5);             // == 1::I16
    indexExpr    = UU_Conv(indexOffset); // == 1::I64

See #21186.

genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock

genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock
genSwitch CmmExpr
expr SwitchTargets
targets = do
  config <- NatM NCGConfig
  let platform = NCGConfig -> Platform
ncgPlatform NCGConfig
      expr_w = Platform -> CmmExpr -> Width
cmmExprWidth Platform
platform CmmExpr
      indexExpr0 = Platform -> CmmExpr -> Int -> CmmExpr
cmmOffset Platform
platform CmmExpr
expr Int
      -- We widen to a native-width register because we cannot use arbitrary sizes
      -- in x86 addressing modes.
      -- See Note [Sub-word subtlety during jump-table indexing].
      indexExpr = MachOp -> [CmmExpr] -> CmmExpr
        (Width -> Width -> MachOp
MO_UU_Conv Width
expr_w (Platform -> Width
platformWordWidth Platform
  if ncgPIC config
  then do
        (reg,e_code) <- getNonClobberedReg indexExpr
           -- getNonClobberedReg because it needs to survive across t_code
        lbl <- getNewLabelNat
        let is32bit = Platform -> Bool
target32Bit Platform
            os = Platform -> OS
platformOS Platform
            -- Might want to use .rodata.<function we're in> instead, but as
            -- long as it's something unique it'll work out since the
            -- references to the jump table are in the appropriate section.
            rosection = case OS
os of
              -- on Mac OS X/x86_64, put the jump table in the text section to
              -- work around a limitation of the linker.
              -- ld64 is unable to handle the relocations for
              --     .quad L1 - L0
              -- if L0 is not preceded by a non-anonymous label in its section.
OSDarwin | Bool -> Bool
not Bool
is32bit -> SectionType -> CLabel -> Section
Section SectionType
Text CLabel
_ -> SectionType -> CLabel -> Section
Section SectionType
ReadOnlyData CLabel
        dynRef <- cmmMakeDynamicReference config DataReference lbl
        (tableReg,t_code) <- getSomeReg $ dynRef
        let op = AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
                                       (Reg -> Int -> EAIndex
EAIndex Reg
reg (Platform -> Int
platformWordSizeInBytes Platform
platform)) (Int -> Imm
ImmInt Int

        return $ e_code `appOL` t_code `appOL` toOL [
                                ADD (intFormat (platformWordWidth platform)) op (OpReg tableReg),
                                JMP_TBL (OpReg tableReg) ids rosection lbl
  else do
        (reg,e_code) <- getSomeReg indexExpr
        lbl <- getNewLabelNat
        let is32bit = Platform -> Bool
target32Bit Platform
        if is32bit
          then let op = AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex EABase
EABaseNone (Reg -> Int -> EAIndex
EAIndex Reg
reg (Platform -> Int
platformWordSizeInBytes Platform
platform)) (CLabel -> Imm
ImmCLbl CLabel
                   jmp_code = Operand -> [Maybe JumpDest] -> Section -> CLabel -> Instr
JMP_TBL Operand
op [Maybe JumpDest]
ids (SectionType -> CLabel -> Section
Section SectionType
ReadOnlyData CLabel
lbl) CLabel
               in return $ e_code `appOL` unitOL jmp_code
          else do
            -- See Note [%rip-relative addressing on x86-64].
            tableReg <- getNewRegNat (intFormat (platformWordWidth platform))
            targetReg <- getNewRegNat (intFormat (platformWordWidth platform))
            let op = AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
tableReg) (Reg -> Int -> EAIndex
EAIndex Reg
reg (Platform -> Int
platformWordSizeInBytes Platform
platform)) (Int -> Imm
ImmInt Int
                fmt = Bool -> Format
archWordFormat Bool
                code = InstrBlock
e_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
                    [ Format -> Operand -> Operand -> Instr
LEA Format
fmt (AddrMode -> Operand
OpAddr (EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex EABase
EABaseRip EAIndex
EAIndexNone (CLabel -> Imm
ImmCLbl CLabel
lbl))) (Reg -> Operand
OpReg Reg
                    , Format -> Operand -> Operand -> Instr
MOV Format
fmt Operand
op (Reg -> Operand
OpReg Reg
                    , Operand -> [Maybe JumpDest] -> Section -> CLabel -> Instr
JMP_TBL (Reg -> Operand
OpReg Reg
targetReg) [Maybe JumpDest]
ids (SectionType -> CLabel -> Section
Section SectionType
ReadOnlyData CLabel
lbl) CLabel
            return code
offset, [Maybe Label]
blockIds) = SwitchTargets -> (Int, [Maybe Label])
switchTargetsToTable SwitchTargets
    ids :: [Maybe JumpDest]
ids = (Maybe Label -> Maybe JumpDest)
-> [Maybe Label] -> [Maybe JumpDest]
forall a b. (a -> b) -> [a] -> [b]
map ((Label -> JumpDest) -> Maybe Label -> Maybe JumpDest
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Label -> JumpDest
DestBlockId) [Maybe Label]

generateJumpTableForInstr :: NCGConfig -> Instr -> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
generateJumpTableForInstr :: NCGConfig
-> Instr -> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
generateJumpTableForInstr NCGConfig
config (JMP_TBL Operand
_ [Maybe JumpDest]
ids Section
section CLabel
    = let getBlockId :: JumpDest -> Label
getBlockId (DestBlockId Label
id) = Label
          getBlockId JumpDest
_ = String -> Label
forall a. HasCallStack => String -> a
panic String
"Non-Label target in Jump Table"
          blockIds :: [Maybe Label]
blockIds = (Maybe JumpDest -> Maybe Label)
-> [Maybe JumpDest] -> [Maybe Label]
forall a b. (a -> b) -> [a] -> [b]
map ((JumpDest -> Label) -> Maybe JumpDest -> Maybe Label
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap JumpDest -> Label
getBlockId) [Maybe JumpDest]
      in NatCmmDecl (Alignment, RawCmmStatics) Instr
-> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
forall a. a -> Maybe a
Just (NCGConfig
-> [Maybe Label]
-> Section
-> CLabel
-> NatCmmDecl (Alignment, RawCmmStatics) Instr
forall h g.
-> [Maybe Label]
-> Section
-> CLabel
-> GenCmmDecl (Alignment, RawCmmStatics) h g
createJumpTable NCGConfig
config [Maybe Label]
blockIds Section
section CLabel
generateJumpTableForInstr NCGConfig
_ Instr
_ = Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
forall a. Maybe a

createJumpTable :: NCGConfig -> [Maybe BlockId] -> Section -> CLabel
                -> GenCmmDecl (Alignment, RawCmmStatics) h g
createJumpTable :: forall h g.
-> [Maybe Label]
-> Section
-> CLabel
-> GenCmmDecl (Alignment, RawCmmStatics) h g
createJumpTable NCGConfig
config [Maybe Label]
ids Section
section CLabel
    = let jumpTable :: [CmmStatic]
            | NCGConfig -> Bool
ncgPIC NCGConfig
config =
                  let ww :: Width
ww = NCGConfig -> Width
ncgWordWidth NCGConfig
                      jumpTableEntryRel :: Maybe Label -> CmmStatic
jumpTableEntryRel Maybe Label
                          = CmmLit -> CmmStatic
CmmStaticLit (Integer -> Width -> CmmLit
CmmInt Integer
0 Width
                      jumpTableEntryRel (Just Label
                          = CmmLit -> CmmStatic
CmmStaticLit (CLabel -> CLabel -> Int -> Width -> CmmLit
CmmLabelDiffOff CLabel
blockLabel CLabel
lbl Int
0 Width
                          where blockLabel :: CLabel
blockLabel = Label -> CLabel
blockLbl Label
                  in (Maybe Label -> CmmStatic) -> [Maybe Label] -> [CmmStatic]
forall a b. (a -> b) -> [a] -> [b]
map Maybe Label -> CmmStatic
jumpTableEntryRel [Maybe Label]
            | Bool
otherwise = (Maybe Label -> CmmStatic) -> [Maybe Label] -> [CmmStatic]
forall a b. (a -> b) -> [a] -> [b]
map (NCGConfig -> Maybe Label -> CmmStatic
jumpTableEntry NCGConfig
config) [Maybe Label]
      in Section
-> (Alignment, RawCmmStatics)
-> GenCmmDecl (Alignment, RawCmmStatics) h g
forall d h g. Section -> d -> GenCmmDecl d h g
CmmData Section
section (Int -> Alignment
mkAlignment Int
1, CLabel -> [CmmStatic] -> RawCmmStatics
forall (rawOnly :: Bool).
CLabel -> [CmmStatic] -> GenCmmStatics rawOnly
CmmStaticsRaw CLabel
lbl [CmmStatic]

extractUnwindPoints :: [Instr] -> [UnwindPoint]
extractUnwindPoints :: [Instr] -> [UnwindPoint]
extractUnwindPoints [Instr]
instrs =
    [ CLabel -> Map GlobalReg (Maybe UnwindExpr) -> UnwindPoint
UnwindPoint CLabel
lbl Map GlobalReg (Maybe UnwindExpr)
unwinds | UNWIND CLabel
lbl Map GlobalReg (Maybe UnwindExpr)
unwinds <- [Instr]

-- -----------------------------------------------------------------------------
-- 'condIntReg' and 'condFltReg': condition codes into registers

-- Turn those condition codes into integers now (when they appear on
-- the right hand side of an assignment).
-- (If applicable) Do not fill the delay slots here; you will confuse the
-- register allocator.

condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register

condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg Cond
cond CmmExpr
x CmmExpr
y = do
  CondCode _ cond cond_code <- Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode Cond
cond CmmExpr
x CmmExpr
  tmp <- getNewRegNat II8
        code Reg
dst = InstrBlock
cond_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                    Cond -> Operand -> Instr
cond (Reg -> Operand
OpReg Reg
                    Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
  return (Any II32 code)

-- Note [SSE Parity Checks]
-- ~~~~~~~~~~~~~~~~~~~~~~~~
-- We have to worry about unordered operands (eg. comparisons
-- against NaN).  If the operands are unordered, the comparison
-- sets the parity flag, carry flag and zero flag.
-- All comparisons are supposed to return false for unordered
-- operands except for !=, which returns true.
-- Optimisation: we don't have to test the parity flag if we
-- know the test has already excluded the unordered case: eg >
-- and >= test for a zero carry flag, which can only occur for
-- ordered operands.
-- By reversing comparisons we can avoid testing the parity
-- for < and <= as well. If any of the arguments is an NaN we
-- return false either way. If both arguments are valid then
-- x <= y  <->  y >= x  holds. So it's safe to swap these.
-- We invert the condition inside getRegister'and  getCondCode
-- which should cover all invertable cases.
-- All other functions translating FP comparisons to assembly
-- use these to two generate the comparison code.
-- As an example consider a simple check:
-- func :: Float -> Float -> Int
-- func x y = if x < y then 1 else 0
-- Which in Cmm gives the floating point comparison.
--  if (%MO_F_Lt_W32(F1, F2)) goto c2gg; else goto c2gf;
-- We used to compile this to an assembly code block like this:
-- _c2gh:
--  ucomiss %xmm2,%xmm1
--  jp _c2gf
--  jb _c2gg
--  jmp _c2gf
-- Where we have to introduce an explicit
-- check for unordered results (using jmp parity):
-- We can avoid this by exchanging the arguments and inverting the direction
-- of the comparison. This results in the sequence of:
--  ucomiss %xmm1,%xmm2
--  ja _c2g2
--  jmp _c2g1
-- Removing the jump reduces the pressure on the branch prediction system
-- and plays better with the uOP cache.

condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg Bool
is32Bit Cond
cond CmmExpr
x CmmExpr
y = NatM Register

  condFltReg_sse2 :: NatM Register
condFltReg_sse2 = do
    CondCode _ cond cond_code <- Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode Cond
cond CmmExpr
x CmmExpr
    tmp1 <- getNewRegNat (archWordFormat is32Bit)
    tmp2 <- getNewRegNat (archWordFormat is32Bit)
    let -- See Note [SSE Parity Checks]
        code Reg
dst =
cond_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
             (case Cond
cond of
NE  -> Reg -> InstrBlock
or_unordered Reg
GU  -> Reg -> InstrBlock
plain_test   Reg
GEU -> Reg -> InstrBlock
plain_test   Reg
                -- Use ASSERT so we don't break releases if these creep in.
LTT -> Bool -> SDoc -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr Bool
False (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"Should have been turned into >") (InstrBlock -> InstrBlock) -> InstrBlock -> InstrBlock
forall a b. (a -> b) -> a -> b
                       Reg -> InstrBlock
and_ordered  Reg
LE  -> Bool -> SDoc -> InstrBlock -> InstrBlock
forall a. HasCallStack => Bool -> SDoc -> a -> a
assertPpr Bool
False (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"Should have been turned into >=") (InstrBlock -> InstrBlock) -> InstrBlock -> InstrBlock
forall a b. (a -> b) -> a -> b
                       Reg -> InstrBlock
and_ordered  Reg
_   -> Reg -> InstrBlock
and_ordered  Reg

        plain_test Reg
dst = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                    Cond -> Operand -> Instr
cond (Reg -> Operand
OpReg Reg
                    Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
tmp1) (Reg -> Operand
OpReg Reg
        or_unordered Reg
dst = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                    Cond -> Operand -> Instr
cond (Reg -> Operand
OpReg Reg
                    Cond -> Operand -> Instr
PARITY (Reg -> Operand
OpReg Reg
                    Format -> Operand -> Operand -> Instr
OR Format
II8 (Reg -> Operand
OpReg Reg
tmp1) (Reg -> Operand
OpReg Reg
                    Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
tmp2) (Reg -> Operand
OpReg Reg
        and_ordered Reg
dst = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
                    Cond -> Operand -> Instr
cond (Reg -> Operand
OpReg Reg
                    Cond -> Operand -> Instr
NOTPARITY (Reg -> Operand
OpReg Reg
                    Format -> Operand -> Operand -> Instr
AND Format
II8 (Reg -> Operand
OpReg Reg
tmp1) (Reg -> Operand
OpReg Reg
                    Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
tmp2) (Reg -> Operand
OpReg Reg
    return (Any II32 code)

-- -----------------------------------------------------------------------------
-- 'trivial*Code': deal with trivial instructions

-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
-- Only look for constants on the right hand side, because that's
-- where the generic optimizer will have put them.

-- Similarly, for unary instructions, we don't have to worry about
-- matching an StInt as the argument, because genericOpt will already
-- have handled the constant-folding.

The Rules of the Game are:

* You cannot assume anything about the destination register dst;
  it may be anything, including a fixed reg.

* You may compute an operand into a fixed reg, but you may not
  subsequently change the contents of that fixed reg.  If you
  want to do so, first copy the value either to a temporary
  or into dst.  You are free to modify dst even if it happens
  to be a fixed reg -- that's not your problem.

* You cannot assume that a fixed reg will stay live over an
  arbitrary computation.  The same applies to the dst reg.

* Temporary regs obtained from getNewRegNat are distinct from
  each other and from all other regs, and stay live over
  arbitrary computations.


SDM's version of The Rules:

* If getRegister returns Any, that means it can generate correct
  code which places the result in any register, period.  Even if that
  register happens to be read during the computation.

  Corollary #1: this means that if you are generating code for an
  operation with two arbitrary operands, you cannot assign the result
  of the first operand into the destination register before computing
  the second operand.  The second operand might require the old value
  of the destination register.

  Corollary #2: A function might be able to generate more efficient
  code if it knows the destination register is a new temporary (and
  therefore not read by any of the sub-computations).

* If getRegister returns Any, then the code it generates may modify only:
        (a) fresh temporaries
        (b) the destination register
        (c) known registers (eg. %ecx is used by shifts)
  In particular, it may *not* modify global registers, unless the global
  register happens to be the destination register.

trivialCode :: Width -> (Operand -> Operand -> Instr)
            -> Maybe (Operand -> Operand -> Instr)
            -> CmmExpr -> CmmExpr -> NatM Register
trivialCode :: Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode Width
width Operand -> Operand -> Instr
instr Maybe (Operand -> Operand -> Instr)
m CmmExpr
a CmmExpr
    = do platform <- NatM Platform
         trivialCode' platform width instr m a b

trivialCode' :: Platform -> Width -> (Operand -> Operand -> Instr)
             -> Maybe (Operand -> Operand -> Instr)
             -> CmmExpr -> CmmExpr -> NatM Register
trivialCode' :: Platform
-> Width
-> (Operand -> Operand -> Instr)
-> Maybe (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialCode' Platform
platform Width
width Operand -> Operand -> Instr
_ (Just Operand -> Operand -> Instr
revinstr) (CmmLit CmmLit
lit_a) CmmExpr
  | Platform -> CmmLit -> Bool
is32BitLit Platform
platform CmmLit
lit_a = do
  b_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
       code Reg
         = Reg -> InstrBlock
b_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
           Operand -> Operand -> Instr
revinstr (Imm -> Operand
OpImm (CmmLit -> Imm
litToImm CmmLit
lit_a)) (Reg -> Operand
OpReg Reg
  return (Any (intFormat width) code)

trivialCode' Platform
_ Width
width Operand -> Operand -> Instr
instr Maybe (Operand -> Operand -> Instr)
_ CmmExpr
a CmmExpr
  = Format
-> (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
genTrivialCode (Width -> Format
intFormat Width
width) Operand -> Operand -> Instr
instr CmmExpr
a CmmExpr

-- This is re-used for floating pt instructions too.
genTrivialCode :: Format -> (Operand -> Operand -> Instr)
               -> CmmExpr -> CmmExpr -> NatM Register
genTrivialCode :: Format
-> (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
genTrivialCode Format
rep Operand -> Operand -> Instr
instr CmmExpr
a CmmExpr
b = do
  (b_op, b_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand CmmExpr
  a_code <- getAnyReg a
  tmp <- getNewRegNat rep
     -- We want the value of 'b' to stay alive across the computation of 'a'.
     -- But, we want to calculate 'a' straight into the destination register,
     -- because the instruction only has two operands (dst := dst `op` src).
     -- The troublesome case is when the result of 'b' is in the same register
     -- as the destination 'reg'.  In this case, we have to save 'b' in a
     -- new temporary across the computation of 'a'.
     code Reg
        | Reg
dst Reg -> Operand -> Bool
`regClashesWithOp` Operand
b_op =
b_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
rep Operand
b_op (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                Reg -> InstrBlock
a_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                Operand -> Operand -> Instr
instr (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
        | Bool
otherwise =
b_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
                Reg -> InstrBlock
a_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                Operand -> Operand -> Instr
instr Operand
b_op (Reg -> Operand
OpReg Reg
  return (Any rep code)

regClashesWithOp :: Reg -> Operand -> Bool
reg regClashesWithOp :: Reg -> Operand -> Bool
`regClashesWithOp` OpReg Reg
reg2   = Reg
reg Reg -> Reg -> Bool
forall a. Eq a => a -> a -> Bool
== Reg
reg `regClashesWithOp` OpAddr AddrMode
amode = (Reg -> Bool) -> [Reg] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Reg -> Reg -> Bool
forall a. Eq a => a -> a -> Bool
reg) (AddrMode -> [Reg]
addrModeRegs AddrMode
_   `regClashesWithOp` Operand
_            = Bool

-- | Generate code for a fused multiply-add operation, of the form @± x * y ± z@,
-- with 3 operands (FMA3 instruction set).
genFMA3Code :: Length
            -> Width
            -> FMASign
            -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
genFMA3Code :: Int
-> Width
-> FMASign
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM Register
genFMA3Code Int
l Width
w FMASign
signs CmmExpr
x CmmExpr
y CmmExpr
z = do
  config <- NatM NCGConfig
  -- For the FMA instruction, we want to compute x * y + z
  -- There are three possible instructions we could emit:
  --   - fmadd213 z y x, result in x, z can be a memory address
  --   - fmadd132 x z y, result in y, x can be a memory address
  --   - fmadd231 y x z, result in z, y can be a memory address
  -- This suggests two possible optimisations:
  --     If one argument is an address, use the instruction that allows
  --     a memory address in that position.
  --     If one argument is in a fixed register, use the instruction that puts
  --     the result in that same register.
  -- Currently we follow neither of these optimisations,
  -- opting to always use fmadd213 for simplicity.
  -- We would like to compute the result directly into the requested register.
  -- To do so we must first compute `x` into the destination register. This is
  -- only possible if the other arguments don't use the destination register.
  -- We check for this and if there is a conflict we move the result only after
  -- the computation. See #24496 how this went wrong in the past.
  let rep
        | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
        = Width -> Format
floatFormat Width
        | Bool
        = CmmType -> Format
vecFormat (Int -> CmmType -> CmmType
cmmVec Int
l (CmmType -> CmmType) -> CmmType -> CmmType
forall a b. (a -> b) -> a -> b
$ Width -> CmmType
cmmFloat Width
  (y_reg, y_code) <- getNonClobberedReg y
  (z_op, z_code) <- getNonClobberedOperand z
  x_code <- getAnyReg x
  x_tmp <- getNewRegNat rep
     fma213 = Format
-> FMASign -> FMAPermutation -> Operand -> Reg -> Reg -> Instr
FMA3 Format
rep FMASign
signs FMAPermutation

     code, code_direct, code_mov :: Reg -> InstrBlock
     -- Ideal: Compute the result directly into dst
     code_direct Reg
dst = Reg -> InstrBlock
x_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                       Operand -> Reg -> Reg -> Instr
fma213 Operand
z_op Reg
y_reg Reg
     -- Fallback: Compute the result into a tmp reg and then move it.
     code_mov Reg
dst    = Reg -> InstrBlock
x_code Reg
x_tmp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                       Operand -> Reg -> Reg -> Instr
fma213 Operand
z_op Reg
y_reg Reg
x_tmp InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
                       HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr
NCGConfig -> Format -> Reg -> Reg -> Instr
mkRegRegMoveInstr NCGConfig
config Format
rep Reg
x_tmp Reg

     code Reg
dst =
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
z_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
        ( if Bool
arg_regs_conflict then Reg -> InstrBlock
code_mov Reg
dst else Reg -> InstrBlock
code_direct Reg
dst )


        arg_regs_conflict :: Bool
arg_regs_conflict =
y_reg Reg -> Reg -> Bool
forall a. Eq a => a -> a -> Bool
== Reg
dst Bool -> Bool -> Bool
          case Operand
z_op of
            OpReg Reg
z_reg -> Reg
z_reg Reg -> Reg -> Bool
forall a. Eq a => a -> a -> Bool
== Reg
            OpAddr AddrMode
amode -> Reg
dst Reg -> [Reg] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` AddrMode -> [Reg]
addrModeRegs AddrMode
            OpImm {} -> Bool

  -- NB: Computing the result into a desired register using Any can be tricky.
  -- So for now, we keep it simple. (See #24496).
  return (Any rep code)


trivialUCode :: Format -> (Operand -> Instr)
             -> CmmExpr -> NatM Register
trivialUCode :: Format -> (Operand -> Instr) -> CmmExpr -> NatM Register
trivialUCode Format
rep Operand -> Instr
instr CmmExpr
x = do
  x_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
     code Reg
dst =
        Reg -> InstrBlock
x_code Reg
dst InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
        Operand -> Instr
instr (Reg -> Operand
OpReg Reg
  return (Any rep code)


trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
                  -> CmmExpr -> CmmExpr -> NatM Register
trivialFCode_sse2 :: Width
-> (Format -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
trivialFCode_sse2 Width
ty Format -> Operand -> Operand -> Instr
instr CmmExpr
x CmmExpr
    = Format
-> (Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
genTrivialCode Format
format (Format -> Operand -> Operand -> Instr
instr Format
format) CmmExpr
x CmmExpr
    where format :: Format
format = Width -> Format
floatFormat Width

coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP Width
from Width
to CmmExpr
x =  NatM Register

   coerce_sse2 :: NatM Register
coerce_sse2 = do
     (x_op, x_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getOperand CmmExpr
x  -- ToDo: could be a safe operand
           opc  = case Width
to of Width
W32 -> Format -> Operand -> Reg -> Instr
W64 -> Format -> Operand -> Reg -> Instr
n -> String -> Format -> Operand -> Reg -> Instr
forall a. HasCallStack => String -> a
panic (String -> Format -> Operand -> Reg -> Instr)
-> String -> Format -> Operand -> Reg -> Instr
forall a b. (a -> b) -> a -> b
$ String
"coerceInt2FP.sse: unhandled width ("
                                         String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
           code Reg
dst = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Format -> Operand -> Reg -> Instr
opc (Width -> Format
intFormat Width
from) Operand
x_op Reg
     return (Any (floatFormat to) code)
        -- works even if the destination rep is <II32

coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int Width
from Width
to CmmExpr
x =  NatM Register
   coerceFP2Int_sse2 :: NatM Register
coerceFP2Int_sse2 = do
     (x_op, x_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getOperand CmmExpr
x  -- ToDo: could be a safe operand
           opc  = case Width
from of Width
W32 -> Format -> Operand -> Reg -> Instr
W64 -> Format -> Operand -> Reg -> Instr
n -> String -> Format -> Operand -> Reg -> Instr
forall a. HasCallStack => String -> a
panic (String -> Format -> Operand -> Reg -> Instr)
-> String -> Format -> Operand -> Reg -> Instr
forall a b. (a -> b) -> a -> b
$ String
"coerceFP2Init.sse: unhandled width ("
                                           String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
           code Reg
dst = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Format -> Operand -> Reg -> Instr
opc (Width -> Format
intFormat Width
to) Operand
x_op Reg
     return (Any (intFormat to) code)
         -- works even if the destination rep is <II32

coerceFP2FP :: Width -> CmmExpr -> NatM Register
coerceFP2FP :: Width -> CmmExpr -> NatM Register
coerceFP2FP Width
to CmmExpr
x = do
  (x_reg, x_code) <- CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg CmmExpr
        opc  = case Width
to of Width
W32 -> Reg -> Reg -> Instr
W64 -> Reg -> Reg -> Instr
n -> String -> Reg -> Reg -> Instr
forall a. HasCallStack => String -> a
panic (String -> Reg -> Reg -> Instr) -> String -> Reg -> Reg -> Instr
forall a b. (a -> b) -> a -> b
$ String
"coerceFP2FP: unhandled width ("
                                                 String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
        code Reg
dst = InstrBlock
x_code InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
`snocOL` Reg -> Reg -> Instr
opc Reg
x_reg Reg
  return (Any ( floatFormat to) code)


sse2NegCode :: Width -> CmmExpr -> NatM Register
sse2NegCode :: Width -> CmmExpr -> NatM Register
sse2NegCode Width
w CmmExpr
x = do
  let fmt :: Format
fmt = Width -> Format
floatFormat Width
  x_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
  -- This is how gcc does it, so it can't be that bad:
    const = case Format
fmt of
FF32 -> Integer -> Width -> CmmLit
CmmInt Integer
0x80000000 Width
FF64 -> Integer -> Width -> CmmLit
CmmInt Integer
0x8000000000000000 Width
      x :: Format
II8  -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
      x :: Format
II16 -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
      x :: Format
II32 -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
      x :: Format
II64 -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format
      x :: Format
x@(VecFormat {}) -> Format -> CmmLit
forall {a} {b}. Show a => a -> b
wrongFmt Format

        wrongFmt :: a -> b
wrongFmt a
x = String -> b
forall a. HasCallStack => String -> a
panic (String -> b) -> String -> b
forall a b. (a -> b) -> a -> b
$ String
"sse2NegCode: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
  Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
  tmp <- getNewRegNat fmt
    code Reg
dst = Reg -> InstrBlock
x_code Reg
dst InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` InstrBlock
amode_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [
        Format -> Operand -> Operand -> Instr
MOV Format
fmt (AddrMode -> Operand
OpAddr AddrMode
amode) (Reg -> Operand
OpReg Reg
        Format -> Operand -> Operand -> Instr
XOR Format
fmt (Reg -> Operand
OpReg Reg
tmp) (Reg -> Operand
OpReg Reg
  return (Any fmt code)

needLlvm :: MachOp -> NatM a
needLlvm :: forall a. MachOp -> NatM a
needLlvm MachOp
mop =
  String -> NatM a
forall a. HasCallStack => String -> a
sorry (String -> NatM a) -> String -> NatM a
forall a b. (a -> b) -> a -> b
$ [String] -> String
unlines [ String
"Unsupported vector instruction for the native code generator:"
                  , MachOp -> String
forall a. Show a => a -> String
show MachOp
                  , String
"Please use -fllvm." ]

incorrectOperands :: NatM a
incorrectOperands :: forall a. NatM a
incorrectOperands = String -> NatM a
forall a. HasCallStack => String -> a
sorry String
"Incorrect number of operands"

invalidConversion :: Width -> Width -> NatM a
invalidConversion :: forall a. Width -> Width -> NatM a
invalidConversion Width
from Width
to =
  String -> NatM a
forall a. HasCallStack => String -> a
sorry (String -> NatM a) -> String -> NatM a
forall a b. (a -> b) -> a -> b
$ String
"Invalid conversion operation from " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width
from String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" to " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Width -> String
forall a. Show a => a -> String
show Width

-- | This works on the invariant that all jumps in the given blocks are required.
--   Starting from there we try to make a few more jumps redundant by reordering
--   them.
--   We depend on the information in the CFG to do so so without a given CFG
--   we do nothing.
invertCondBranches :: Maybe CFG  -- ^ CFG if present
                   -> LabelMap a -- ^ Blocks with info tables
                   -> [NatBasicBlock Instr] -- ^ List of basic blocks
                   -> [NatBasicBlock Instr]
invertCondBranches :: forall a.
Maybe CFG
-> LabelMap a -> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invertCondBranches Maybe CFG
Nothing LabelMap a
_       [NatBasicBlock Instr]
bs = [NatBasicBlock Instr]
invertCondBranches (Just CFG
cfg) LabelMap a
keep [NatBasicBlock Instr]
bs =
    [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invert [NatBasicBlock Instr]
    invert :: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
    invert :: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invert (BasicBlock Label
lbl1 [Instr]
ins:b2 :: NatBasicBlock Instr
b2@(BasicBlock Label
lbl2 [Instr]
_):[NatBasicBlock Instr]
      | --pprTrace "Block" (ppr lbl1) True,
        Just (Instr
jmp2) <- [Instr] -> Maybe (Instr, Instr)
forall a. [a] -> Maybe (a, a)
last2 [Instr]
      , JXX Cond
cond1 Label
target1 <- Instr
      , Label
target1 Label -> Label -> Bool
forall a. Eq a => a -> a -> Bool
== Label
      --, pprTrace "CutChance" (ppr b1) True
      , JXX Cond
target2 <- Instr
      -- We have enough information to check if we can perform the inversion
      -- TODO: We could also check for the last asm instruction which sets
      -- status flags instead. Which I suspect is worse in terms of compiler
      -- performance, but might be applicable to more cases
      , Just EdgeInfo
edgeInfo1 <- Label -> Label -> CFG -> Maybe EdgeInfo
getEdgeInfo Label
lbl1 Label
target1 CFG
      , Just EdgeInfo
edgeInfo2 <- Label -> Label -> CFG -> Maybe EdgeInfo
getEdgeInfo Label
lbl1 Label
target2 CFG
      -- Both jumps come from the same cmm statement
      , EdgeInfo -> TransitionSource
transitionSource EdgeInfo
edgeInfo1 TransitionSource -> TransitionSource -> Bool
forall a. Eq a => a -> a -> Bool
== EdgeInfo -> TransitionSource
transitionSource EdgeInfo
      , CmmSource {trans_cmmNode :: TransitionSource -> CmmNode O C
trans_cmmNode = CmmNode O C
cmmCondBranch} <- EdgeInfo -> TransitionSource
transitionSource EdgeInfo

      --Int comparisons are invertable
      , CmmCondBranch (CmmMachOp MachOp
op [CmmExpr]
_args) Label
_ Label
_ Maybe Bool
_ <- CmmNode O C
      , Just Width
_ <- MachOp -> Maybe Width
maybeIntComparison MachOp
      , Just Cond
invCond <- Cond -> Maybe Cond
maybeInvertCond Cond

      --Swap the last two jumps, invert the conditional jumps condition.
      = let jumps :: [Instr]
jumps =
              case () of
                -- We are free the eliminate the jmp. So we do so.
_ | Bool -> Bool
not (Label -> LabelMap a -> Bool
forall a. Label -> LabelMap a -> Bool
mapMember Label
target1 LabelMap a
                    -> [Cond -> Label -> Instr
JXX Cond
invCond Label
                -- If the conditional target is unlikely we put the other
                -- target at the front.
                  | EdgeInfo -> EdgeWeight
edgeWeight EdgeInfo
edgeInfo2 EdgeWeight -> EdgeWeight -> Bool
forall a. Ord a => a -> a -> Bool
> EdgeInfo -> EdgeWeight
edgeWeight EdgeInfo
                    -> [Cond -> Label -> Instr
JXX Cond
invCond Label
target2, Cond -> Label -> Instr
JXX Cond
                -- Keep things as-is otherwise
                  | Bool
                    -> [Instr
jmp1, Instr
        in --pprTrace "Cutable" (ppr [jmp1,jmp2] <+> text "=>" <+> ppr jumps) $
           (Label -> [Instr] -> NatBasicBlock Instr
forall i. Label -> [i] -> GenBasicBlock i
BasicBlock Label
            (Int -> [Instr] -> [Instr]
forall a. Int -> [a] -> [a]
dropTail Int
2 [Instr]
ins [Instr] -> [Instr] -> [Instr]
forall a. [a] -> [a] -> [a]
++ [Instr]
            NatBasicBlock Instr
-> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
forall a. a -> [a] -> [a]
: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invert (NatBasicBlock Instr
b2NatBasicBlock Instr
-> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
forall a. a -> [a] -> [a]
:[NatBasicBlock Instr]
    invert (NatBasicBlock Instr
b:[NatBasicBlock Instr]
bs) = NatBasicBlock Instr
b NatBasicBlock Instr
-> [NatBasicBlock Instr] -> [NatBasicBlock Instr]
forall a. a -> [a] -> [a]
: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
invert [NatBasicBlock Instr]
    invert [] = []

  :: BlockId
  -> Width
  -> AtomicMachOp
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM (InstrBlock, Maybe BlockId)
genAtomicRMW :: Label
-> Width
-> AtomicMachOp
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM (InstrBlock, Maybe Label)
genAtomicRMW Label
bid Width
width AtomicMachOp
amop LocalReg
dst CmmExpr
addr CmmExpr
n = do
    Amode amode addr_code <-
        if AtomicMachOp
amop AtomicMachOp -> [AtomicMachOp] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [AtomicMachOp
AMO_Add, AtomicMachOp
        then CmmExpr -> NatM Amode
getAmode CmmExpr
        else CmmExpr -> NatM Amode
getSimpleAmode CmmExpr
addr  -- See genForeignCall for MO_Cmpxchg
    arg <- getNewRegNat format
    arg_code <- getAnyReg n
    platform <- ncgPlatform <$> getConfig

    let dst_r    = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
    (code, lbl) <- op_code dst_r arg amode
    return (addr_code `appOL` arg_code arg `appOL` code, Just lbl)
    -- Code for the operation
    op_code :: Reg       -- Destination reg
            -> Reg       -- Register containing argument
            -> AddrMode  -- Address of location to mutate
            -> NatM (OrdList Instr,BlockId) -- TODO: Return Maybe BlockId
    op_code :: Reg -> Reg -> AddrMode -> NatM (InstrBlock, Label)
op_code Reg
dst_r Reg
arg AddrMode
amode = do
        case AtomicMachOp
amop of
          -- In the common case where dst_r is a virtual register the
          -- final move should go away, because it's the last use of arg
          -- and the first use of dst_r.
AMO_Add  -> (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return ((InstrBlock, Label) -> NatM (InstrBlock, Label))
-> (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a b. (a -> b) -> a -> b
$ ([Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Instr -> Instr
LOCK (Format -> Operand -> Operand -> Instr
XADD Format
format (Reg -> Operand
OpReg Reg
arg) (AddrMode -> Operand
OpAddr AddrMode
                                     , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
arg) (Reg -> Operand
OpReg Reg
                                     ], Label
AMO_Sub  -> (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return ((InstrBlock, Label) -> NatM (InstrBlock, Label))
-> (InstrBlock, Label) -> NatM (InstrBlock, Label)
forall a b. (a -> b) -> a -> b
$ ([Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Instr
NEGI Format
format (Reg -> Operand
OpReg Reg
                                     , Instr -> Instr
LOCK (Format -> Operand -> Operand -> Instr
XADD Format
format (Reg -> Operand
OpReg Reg
arg) (AddrMode -> Operand
OpAddr AddrMode
                                     , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
arg) (Reg -> Operand
OpReg Reg
                                     ], Label
          -- In these cases we need a new block id, and have to return it so
          -- that later instruction selection can reference it.
AMO_And  -> (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code (\ Operand
src Operand
dst -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Operand -> Instr
AND Format
format Operand
src Operand
AMO_Nand -> (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code (\ Operand
src Operand
dst -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Operand -> Instr
AND Format
format Operand
src Operand
                                                      , Format -> Operand -> Instr
NOT Format
format Operand
AMO_Or   -> (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code (\ Operand
src Operand
dst -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Operand -> Instr
OR Format
format Operand
src Operand
AMO_Xor  -> (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code (\ Operand
src Operand
dst -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Operand -> Instr
XOR Format
format Operand
src Operand
        -- Simulate operation that lacks a dedicated instruction using
        -- cmpxchg.
        cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
                     -> NatM (OrdList Instr, BlockId)
        cmpxchg_code :: (Operand -> Operand -> InstrBlock) -> NatM (InstrBlock, Label)
cmpxchg_code Operand -> Operand -> InstrBlock
instrs = do
            lbl1 <- NatM Label
            lbl2 <- getBlockIdNat
            tmp <- getNewRegNat format

            --Record inserted blocks
            --  We turn A -> B into A -> A' -> A'' -> B
            --  with a self loop on A'.
            addImmediateSuccessorNat bid lbl1
            addImmediateSuccessorNat lbl1 lbl2
            updateCfgNat (addWeightEdge lbl1 lbl1 0)

            return $ (toOL
                [ MOV format (OpAddr amode) (OpReg eax)
                , JXX ALWAYS lbl1
                , NEWBLOCK lbl1
                  -- Keep old value so we can return it:
                , MOV format (OpReg eax) (OpReg dst_r)
                , MOV format (OpReg eax) (OpReg tmp)
                `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
                [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
                , JXX NE lbl1
                -- See Note [Introducing cfg edges inside basic blocks]
                -- why this basic block is required.
                , JXX ALWAYS lbl2
                , NEWBLOCK lbl2
    format :: Format
format = Width -> Format
intFormat Width

-- | Count trailing zeroes
genCtz :: BlockId -> Width -> LocalReg -> CmmExpr -> NatM (InstrBlock, Maybe BlockId)
genCtz :: Label
-> Width -> LocalReg -> CmmExpr -> NatM (InstrBlock, Maybe Label)
genCtz Label
bid Width
width LocalReg
dst CmmExpr
src = do
  is32Bit <- NatM Bool
  if is32Bit && width == W64
    then genCtz64_32 bid dst src
    else (,Nothing) <$> genCtzGeneric width dst src

-- | Count trailing zeroes
-- 64-bit width on 32-bit architecture
  :: BlockId
  -> LocalReg
  -> CmmExpr
  -> NatM (InstrBlock, Maybe BlockId)
genCtz64_32 :: Label -> LocalReg -> CmmExpr -> NatM (InstrBlock, Maybe Label)
genCtz64_32 Label
bid LocalReg
dst CmmExpr
src = do
  RegCode64 vcode rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
  let dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
  lbl1 <- getBlockIdNat
  lbl2 <- getBlockIdNat
  tmp_r <- getNewRegNat II64

  -- New CFG Edges:
  --  bid -> lbl2
  --  bid -> lbl1 -> lbl2
  --  We also changes edges originating at bid to start at lbl2 instead.
  weights <- getCfgWeights
  updateCfgNat (addWeightEdge bid lbl1 110 .
                addWeightEdge lbl1 lbl2 110 .
                addImmediateSuccessor weights bid lbl2)

  -- The following instruction sequence corresponds to the pseudo-code
  --  if (src) {
  --    dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
  --  } else {
  --    dst = 64;
  --  }
  let instrs = InstrBlock
vcode InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
           ([ Format -> Operand -> Operand -> Instr
MOV      Format
II32 (Reg -> Operand
OpReg Reg
rhi)         (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
OR       Format
II32 (Reg -> Operand
OpReg Reg
rlo)         (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Operand -> Instr
MOV      Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
64)) (Reg -> Operand
OpReg Reg
            , Cond -> Label -> Instr
JXX Cond
EQQ    Label
            , Cond -> Label -> Instr
JXX Cond

            , Label -> Instr
            , Format -> Operand -> Reg -> Instr
BSF     Format
II32 (Reg -> Operand
OpReg Reg
rhi)         Reg
            , Format -> Operand -> Operand -> Instr
ADD     Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
32)) (Reg -> Operand
OpReg Reg
            , Format -> Operand -> Reg -> Instr
BSF     Format
II32 (Reg -> Operand
OpReg Reg
rlo)         Reg
            , Cond -> Format -> Operand -> Reg -> Instr
NE Format
II32 (Reg -> Operand
OpReg Reg
tmp_r)       Reg
            , Cond -> Label -> Instr
JXX Cond

            , Label -> Instr
  return (instrs, Just lbl2)

-- | Count trailing zeroes
-- Generic case (width <= word size)
genCtzGeneric :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genCtzGeneric :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genCtzGeneric Width
width LocalReg
dst CmmExpr
src = do
  code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
  config <- getConfig
  let bw = Width -> Int
widthInBits Width
  let dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
  if ncgBmiVersion config >= Just BMI2
  then do
      src_r <- getNewRegNat (intFormat width)
      let instrs = InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
appOL (Reg -> InstrBlock
code_src Reg
src_r) (InstrBlock -> InstrBlock) -> InstrBlock -> InstrBlock
forall a b. (a -> b) -> a -> b
$ case Width
width of
W8 -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
                  [ Format -> Operand -> Operand -> Instr
OR    Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
0xFFFFFF00)) (Reg -> Operand
OpReg Reg
                  , Format -> Operand -> Reg -> Instr
TZCNT Format
II32 (Reg -> Operand
OpReg Reg
src_r) Reg
W16 -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
                  [ Format -> Operand -> Reg -> Instr
TZCNT  Format
II16 (Reg -> Operand
OpReg Reg
src_r) Reg
                  , Format -> Operand -> Operand -> Instr
MOVZxL Format
II16 (Reg -> Operand
OpReg Reg
dst_r) (Reg -> Operand
OpReg Reg
_ -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Instr -> InstrBlock) -> Instr -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Format -> Operand -> Reg -> Instr
TZCNT (Width -> Format
intFormat Width
width) (Reg -> Operand
OpReg Reg
src_r) Reg
      return instrs
  else do
      -- The following insn sequence makes sure 'ctz 0' has a defined value.
      -- starting with Haswell, one could use the TZCNT insn instead.
      let format = if Width
width Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W8 then Format
II16 else Width -> Format
intFormat Width
      src_r <- getNewRegNat format
      tmp_r <- getNewRegNat format
      let instrs = Reg -> InstrBlock
code_src Reg
src_r InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` [Instr] -> InstrBlock
forall a. [a] -> OrdList a
               ([ Format -> Operand -> Operand -> Instr
MOVZxL  Format
II8    (Reg -> Operand
OpReg Reg
src_r)       (Reg -> Operand
OpReg Reg
src_r) | Width
width Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W8 ] [Instr] -> [Instr] -> [Instr]
forall a. [a] -> [a] -> [a]
                [ Format -> Operand -> Reg -> Instr
BSF     Format
format (Reg -> Operand
OpReg Reg
src_r)       Reg
                , Format -> Operand -> Operand -> Instr
MOV     Format
II32   (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
bw)) (Reg -> Operand
OpReg Reg
                , Cond -> Format -> Operand -> Reg -> Instr
NE Format
format (Reg -> Operand
OpReg Reg
tmp_r)       Reg
                ]) -- NB: We don't need to zero-extend the result for the
                   -- W8/W16 cases because the 'MOV' insn already
                   -- took care of implicitly clearing the upper bits
      return instrs

-- | Copy memory
-- Unroll memcpy calls if the number of bytes to copy isn't too large (cf
-- ncgInlineThresholdMemcpy).  Otherwise, call C's memcpy.
  :: BlockId
  -> Int
  -> CmmExpr
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genMemCpy :: Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemCpy Label
bid Int
align CmmExpr
dst CmmExpr
src CmmExpr
arg_n = do

  let libc_memcpy :: NatM InstrBlock
libc_memcpy = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"memcpy") [] [CmmExpr

  case CmmExpr
arg_n of
    CmmLit (CmmInt Integer
n Width
_) -> do
      -- try to inline it
      mcode <- Int -> CmmExpr -> CmmExpr -> Integer -> NatM (Maybe InstrBlock)
genMemCpyInlineMaybe Int
align CmmExpr
dst CmmExpr
src Integer
      -- if it didn't inline, call the C function
      case mcode of
        Maybe InstrBlock
Nothing -> NatM InstrBlock
        Just InstrBlock
c  -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure InstrBlock

    -- not a literal size argument: call the C function
_ -> NatM InstrBlock

  :: Int
  -> CmmExpr
  -> CmmExpr
  -> Integer
  -> NatM (Maybe InstrBlock)
genMemCpyInlineMaybe :: Int -> CmmExpr -> CmmExpr -> Integer -> NatM (Maybe InstrBlock)
genMemCpyInlineMaybe Int
align CmmExpr
dst CmmExpr
src Integer
n = do
  config <- NatM NCGConfig
    platform     = NCGConfig -> Platform
ncgPlatform NCGConfig
    maxAlignment = Platform -> Alignment
wordAlignment Platform
                   -- only machine word wide MOVs are supported
    effectiveAlignment = Alignment -> Alignment -> Alignment
forall a. Ord a => a -> a -> a
min (Int -> Alignment
alignmentOf Int
align) Alignment
    format = Width -> Format
intFormat (Width -> Format) -> (Int -> Width) -> Int -> Format
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Width
widthFromBytes (Int -> Format) -> Int -> Format
forall a b. (a -> b) -> a -> b
$ Alignment -> Int
alignmentBytes Alignment

  -- The size of each move, in bytes.
  let sizeBytes :: Integer
      sizeBytes = Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Format -> Int
formatInBytes Format

  -- The number of instructions we will generate (approx). We need 2
  -- instructions per move.
  let insns = Integer
2 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* ((Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
sizeBytes Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
1) Integer -> Integer -> Integer
forall a. Integral a => a -> a -> a
`div` Integer

      go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
      go Reg
dst Reg
src Reg
tmp Integer
          | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
sizeBytes =
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
format (AddrMode -> Operand
OpAddr AddrMode
src_addr) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr AddrMode
dst_addr)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
              Reg -> Reg -> Reg -> Integer -> InstrBlock
go Reg
dst Reg
src Reg
tmp (Integer
i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
          -- Deal with remaining bytes.
          | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
4 =  -- Will never happen on 32-bit
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (AddrMode -> Operand
OpAddr AddrMode
src_addr) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr AddrMode
dst_addr)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
              Reg -> Reg -> Reg -> Integer -> InstrBlock
go Reg
dst Reg
src Reg
tmp (Integer
i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
          | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
2 =
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOVZxL Format
II16 (AddrMode -> Operand
OpAddr AddrMode
src_addr) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV    Format
II16  (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr AddrMode
dst_addr)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
              Reg -> Reg -> Reg -> Integer -> InstrBlock
go Reg
dst Reg
src Reg
tmp (Integer
i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
          | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
1 =
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (AddrMode -> Operand
OpAddr AddrMode
src_addr) (Reg -> Operand
OpReg Reg
tmp)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
              Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV    Format
II8 (Reg -> Operand
OpReg Reg
tmp) (AddrMode -> Operand
OpAddr AddrMode
dst_addr)) InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
              Reg -> Reg -> Reg -> Integer -> InstrBlock
go Reg
dst Reg
src Reg
tmp (Integer
i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
          | Bool
otherwise = InstrBlock
forall a. OrdList a
          src_addr :: AddrMode
src_addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
src) EAIndex
                       (Integer -> Imm
ImmInteger (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer

          dst_addr :: AddrMode
dst_addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
dst) EAIndex
                       (Integer -> Imm
ImmInteger (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer

  if insns > fromIntegral (ncgInlineThresholdMemcpy config)
    then pure Nothing
    else do
      code_dst <- getAnyReg dst
      dst_r <- getNewRegNat format
      code_src <- getAnyReg src
      src_r <- getNewRegNat format
      tmp_r <- getNewRegNat format
      pure $ Just $ code_dst dst_r `appOL` code_src src_r `appOL`
                      go dst_r src_r tmp_r (fromInteger n)

-- | Set memory to the given byte
-- Unroll memset calls if the number of bytes to copy isn't too large (cf
-- ncgInlineThresholdMemset).  Otherwise, call C's memset.
  :: BlockId
  -> Int
  -> CmmExpr
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genMemSet :: Label -> Int -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemSet Label
bid Int
align CmmExpr
dst CmmExpr
arg_c CmmExpr
arg_n = do

  let libc_memset :: NatM InstrBlock
libc_memset = Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"memset") [] [CmmExpr

  case (CmmExpr
arg_n) of
    (CmmLit (CmmInt Integer
c Width
_), CmmLit (CmmInt Integer
n Width
_)) -> do
      -- try to inline it
      mcode <- Int -> CmmExpr -> Integer -> Integer -> NatM (Maybe InstrBlock)
genMemSetInlineMaybe Int
align CmmExpr
dst Integer
c Integer
      -- if it didn't inline, call the C function
      case mcode of
        Maybe InstrBlock
Nothing -> NatM InstrBlock
        Just InstrBlock
c  -> InstrBlock -> NatM InstrBlock
forall a. a -> NatM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure InstrBlock

    -- not literal size arguments: call the C function
    (CmmExpr, CmmExpr)
_ -> NatM InstrBlock

  :: Int
  -> CmmExpr
  -> Integer
  -> Integer
  -> NatM (Maybe InstrBlock)
genMemSetInlineMaybe :: Int -> CmmExpr -> Integer -> Integer -> NatM (Maybe InstrBlock)
genMemSetInlineMaybe Int
align CmmExpr
dst Integer
c Integer
n = do
  config <- NatM NCGConfig
    platform = NCGConfig -> Platform
ncgPlatform NCGConfig
    maxAlignment = Platform -> Alignment
wordAlignment Platform
platform -- only machine word wide MOVs are supported
    effectiveAlignment = Alignment -> Alignment -> Alignment
forall a. Ord a => a -> a -> a
min (Int -> Alignment
alignmentOf Int
align) Alignment
    format = Width -> Format
intFormat (Width -> Format) -> (Int -> Width) -> Int -> Format
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Width
widthFromBytes (Int -> Format) -> Int -> Format
forall a b. (a -> b) -> a -> b
$ Alignment -> Int
alignmentBytes Alignment
    c2 = Integer
c Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
8 Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. Integer
    c4 = Integer
c2 Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
16 Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. Integer
    c8 = Integer
c4 Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
32 Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. Integer

    -- The number of instructions we will generate (approx). We need 1
    -- instructions per move.
    insns = (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
sizeBytes Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
1) Integer -> Integer -> Integer
forall a. Integral a => a -> a -> a
`div` Integer

    -- The size of each move, in bytes.
    sizeBytes :: Integer
    sizeBytes = Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Format -> Int
formatInBytes Format

    -- Depending on size returns the widest MOV instruction and its
    -- width.
    gen4 :: AddrMode -> Integer -> (InstrBlock, Integer)
    gen4 AddrMode
addr Integer
        | Integer
size Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
4 =
            (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II32 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
c4)) (AddrMode -> Operand
OpAddr AddrMode
addr)), Integer
        | Integer
size Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
2 =
            (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II16 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
c2)) (AddrMode -> Operand
OpAddr AddrMode
addr)), Integer
        | Integer
size Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
1 =
            (Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
II8 (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
c)) (AddrMode -> Operand
OpAddr AddrMode
addr)), Integer
        | Bool
otherwise = (InstrBlock
forall a. OrdList a
nilOL, Integer

    -- Generates a 64-bit wide MOV instruction from REG to MEM.
    gen8 :: AddrMode -> Reg -> InstrBlock
    gen8 AddrMode
addr Reg
reg8byte =
      Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
reg8byte) (AddrMode -> Operand
OpAddr AddrMode

    -- Unrolls memset when the widest MOV is <= 4 bytes.
    go4 :: Reg -> Integer -> InstrBlock
    go4 Reg
dst Integer
left =
      if Integer
left Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
<= Integer
0 then InstrBlock
forall a. OrdList a
      else InstrBlock
curMov InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Reg -> Integer -> InstrBlock
go4 Reg
dst (Integer
left Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
        possibleWidth :: Integer
possibleWidth = [Integer] -> Integer
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Integer
left, Integer
        dst_addr :: AddrMode
dst_addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
dst) EAIndex
EAIndexNone (Integer -> Imm
ImmInteger (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
curMov, Integer
curWidth) = AddrMode -> Integer -> (InstrBlock, Integer)
gen4 AddrMode
dst_addr Integer

    -- Unrolls memset when the widest MOV is 8 bytes (thus another Reg
    -- argument). Falls back to go4 when all 8 byte moves are
    -- exhausted.
    go8 :: Reg -> Reg -> Integer -> InstrBlock
    go8 Reg
dst Reg
reg8byte Integer
left =
      if Integer
possibleWidth Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
8 then
        let curMov :: InstrBlock
curMov = AddrMode -> Reg -> InstrBlock
gen8 AddrMode
dst_addr Reg
        in  InstrBlock
curMov InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
`appOL` Reg -> Reg -> Integer -> InstrBlock
go8 Reg
dst Reg
reg8byte (Integer
left Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
      else Reg -> Integer -> InstrBlock
go4 Reg
dst Integer
        possibleWidth :: Integer
possibleWidth = [Integer] -> Integer
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Integer
left, Integer
        dst_addr :: AddrMode
dst_addr = EABase -> EAIndex -> Imm -> AddrMode
AddrBaseIndex (Reg -> EABase
EABaseReg Reg
dst) EAIndex
EAIndexNone (Integer -> Imm
ImmInteger (Integer
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer

  if fromInteger insns > ncgInlineThresholdMemset config
    then pure Nothing
    else do
        code_dst <- getAnyReg dst
        dst_r <- getNewRegNat format
        if format == II64 && n >= 8
          then do
            code_imm8byte <- getAnyReg (CmmLit (CmmInt c8 W64))
            imm8byte_r <- getNewRegNat II64
            return $ Just $ code_dst dst_r `appOL`
                              code_imm8byte imm8byte_r `appOL`
                              go8 dst_r imm8byte_r (fromInteger n)
            return $ Just $ code_dst dst_r `appOL`
                              go4 dst_r (fromInteger n)

genMemMove :: BlockId -> p -> CmmActual -> CmmActual -> CmmActual -> NatM InstrBlock
genMemMove :: forall p.
Label -> p -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
genMemMove Label
bid p
_align CmmExpr
dst CmmExpr
src CmmExpr
n = do
  -- TODO: generate inline assembly when under a given threshold (similarly to
  -- memcpy and memset)
  Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"memmove") [] [CmmExpr

genMemCmp :: BlockId -> p -> CmmFormal -> CmmActual -> CmmActual -> CmmActual -> NatM InstrBlock
genMemCmp :: forall p.
-> p
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genMemCmp Label
bid p
_align LocalReg
res CmmExpr
dst CmmExpr
src CmmExpr
n = do
  -- TODO: generate inline assembly when under a given threshold (similarly to
  -- memcpy and memset)
  Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genLibCCall Label
bid (String -> FastString
fsLit String
"memcmp") [LocalReg
res] [CmmExpr

genPrefetchData :: Int -> CmmExpr -> NatM (OrdList Instr)
genPrefetchData :: Int -> CmmExpr -> NatM InstrBlock
genPrefetchData Int
n CmmExpr
src = do
  is32Bit <- NatM Bool
    format = Bool -> Format
archWordFormat Bool
    -- need to know what register width for pointers!
    genPrefetch CmmExpr
inRegSrc Operand -> Instr
prefetchCTor = do
      code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
      src_r <- getNewRegNat format
      return $ code_src src_r `appOL`
        (unitOL (prefetchCTor  (OpAddr
                    ((AddrBaseIndex (EABaseReg src_r )   EAIndexNone (ImmInt 0))))  ))
        -- prefetch always takes an address

  -- the c / llvm prefetch convention is 0, 1, 2, and 3
  -- the x86 corresponding names are : NTA, 2 , 1, and 0
  case n of
0 -> CmmExpr -> (Operand -> Instr) -> NatM InstrBlock
genPrefetch CmmExpr
src ((Operand -> Instr) -> NatM InstrBlock)
-> (Operand -> Instr) -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ PrefetchVariant -> Format -> Operand -> Instr
PREFETCH PrefetchVariant
NTA  Format
1 -> CmmExpr -> (Operand -> Instr) -> NatM InstrBlock
genPrefetch CmmExpr
src ((Operand -> Instr) -> NatM InstrBlock)
-> (Operand -> Instr) -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ PrefetchVariant -> Format -> Operand -> Instr
PREFETCH PrefetchVariant
Lvl2 Format
2 -> CmmExpr -> (Operand -> Instr) -> NatM InstrBlock
genPrefetch CmmExpr
src ((Operand -> Instr) -> NatM InstrBlock)
-> (Operand -> Instr) -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ PrefetchVariant -> Format -> Operand -> Instr
PREFETCH PrefetchVariant
Lvl1 Format
3 -> CmmExpr -> (Operand -> Instr) -> NatM InstrBlock
genPrefetch CmmExpr
src ((Operand -> Instr) -> NatM InstrBlock)
-> (Operand -> Instr) -> NatM InstrBlock
forall a b. (a -> b) -> a -> b
$ PrefetchVariant -> Format -> Operand -> Instr
PREFETCH PrefetchVariant
Lvl0 Format
l -> String -> SDoc -> NatM InstrBlock
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"genPrefetchData: unexpected prefetch level" (Int -> SDoc
forall a. Outputable a => a -> SDoc
ppr Int

genByteSwap :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genByteSwap :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genByteSwap Width
width LocalReg
dst CmmExpr
src = do
  is32Bit <- NatM Bool
  let format = Width -> Format
intFormat Width
  case width of
W64 | Bool
is32Bit -> do
        let Reg64 Reg
dst_hi Reg
dst_lo = HasDebugCallStack => LocalReg -> Reg64
LocalReg -> Reg64
localReg64 LocalReg
        RegCode64 vcode rhi rlo <- HasDebugCallStack => CmmExpr -> NatM (RegCode64 InstrBlock)
CmmExpr -> NatM (RegCode64 InstrBlock)
iselExpr64 CmmExpr
        return $ vcode `appOL`
                 toOL [ MOV II32 (OpReg rlo) (OpReg dst_hi),
                        MOV II32 (OpReg rhi) (OpReg dst_lo),
                        BSWAP II32 dst_hi,
                        BSWAP II32 dst_lo ]
W16 -> do
        let dst_r :: Reg
dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
        code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
        return $ code_src dst_r `appOL`
                 unitOL (BSWAP II32 dst_r) `appOL`
                 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
_   -> do
        let dst_r :: Reg
dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
        code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
        return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)

genBitRev :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
genBitRev :: Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genBitRev Label
bid Width
width LocalReg
dst CmmExpr
src = do
  -- Here the C implementation (hs_bitrevN) is used as there is no x86
  -- instruction to reverse a word's bit order.
  Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (Width -> FastString
bRevLabel Width
width) [LocalReg
dst] [CmmExpr

genPopCnt :: BlockId -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genPopCnt :: Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genPopCnt Label
bid Width
width LocalReg
dst CmmExpr
src = do
  config <- NatM NCGConfig
    platform = NCGConfig -> Platform
ncgPlatform NCGConfig
    format = Width -> Format
intFormat Width

  sse4_2Enabled >>= \case

True -> do
      code_src <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
      src_r <- getNewRegNat format
      let dst_r = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
      return $ code_src src_r `appOL`
          (if width == W8 then
               -- The POPCNT instruction doesn't take a r/m8
               unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
               unitOL (POPCNT II16 (OpReg src_r) dst_r)
               unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
          (if width == W8 || width == W16 then
               -- We used a 16-bit destination register above,
               -- so zero-extend
               unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
           else nilOL)

False ->
      -- generate C call to hs_popcntN in ghc-prim
      -- TODO: we could directly generate the assembly to index popcount_tab
      -- here instead of doing it by calling a C function
      Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (Width -> FastString
popCntLabel Width
width) [LocalReg
dst] [CmmExpr

genPdep :: BlockId -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPdep :: Label -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPdep Label
bid Width
width LocalReg
dst CmmExpr
src CmmExpr
mask = do
  config <- NatM NCGConfig
    platform = NCGConfig -> Platform
ncgPlatform NCGConfig
    format = Width -> Format
intFormat Width

  if ncgBmiVersion config >= Just BMI2
    then do
      code_src  <- getAnyReg src
      code_mask <- getAnyReg mask
      src_r     <- getNewRegNat format
      mask_r    <- getNewRegNat format
      let dst_r = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
      return $ code_src src_r `appOL` code_mask mask_r `appOL`
          -- PDEP only supports > 32 bit args
          ( if width == W8 || width == W16 then
                [ MOVZxL format (OpReg src_r ) (OpReg src_r )
                , MOVZxL format (OpReg mask_r) (OpReg mask_r)
                , PDEP   II32 (OpReg mask_r) (OpReg src_r ) dst_r
                , MOVZxL format (OpReg dst_r) (OpReg dst_r) -- Truncate to op width
              unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)
      -- generate C call to hs_pdepN in ghc-prim
      genPrimCCall bid (pdepLabel width) [dst] [src,mask]

genPext :: BlockId -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPext :: Label -> Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genPext Label
bid Width
width LocalReg
dst CmmExpr
src CmmExpr
mask = do
  config <- NatM NCGConfig
  if ncgBmiVersion config >= Just BMI2
    then do
      let format   = Width -> Format
intFormat Width
      let dst_r    = LocalReg -> Reg
getLocalRegReg LocalReg
      code_src  <- getAnyReg src
      code_mask <- getAnyReg mask
      src_r     <- getNewRegNat format
      mask_r    <- getNewRegNat format
      return $ code_src src_r `appOL` code_mask mask_r `appOL`
          (if width == W8 || width == W16 then
               -- The PEXT instruction doesn't take a r/m8 or 16
                [ MOVZxL format (OpReg src_r ) (OpReg src_r )
                , MOVZxL format (OpReg mask_r) (OpReg mask_r)
                , PEXT   II32   (OpReg mask_r) (OpReg src_r ) dst_r
                , MOVZxL format (OpReg dst_r)  (OpReg dst_r) -- Truncate to op width
              unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)
      -- generate C call to hs_pextN in ghc-prim
      genPrimCCall bid (pextLabel width) [dst] [src,mask]

genClz :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
genClz :: Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genClz Label
bid Width
width LocalReg
dst CmmExpr
src = do
  is32Bit <- NatM Bool
  config <- getConfig
  if is32Bit && width == W64

      -- Fallback to `hs_clz64` on i386
      genPrimCCall bid (clzLabel width) [dst] [src]

    else do
      code_src <- getAnyReg src
      let dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
      if ncgBmiVersion config >= Just BMI2
        then do
          src_r <- getNewRegNat (intFormat width)
          return $ appOL (code_src src_r) $ case width of
W8 -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
                [ Format -> Operand -> Operand -> Instr
MOVZxL Format
II8  (Reg -> Operand
OpReg Reg
src_r)       (Reg -> Operand
OpReg Reg
src_r) -- zero-extend to 32 bit
                , Format -> Operand -> Reg -> Instr
LZCNT  Format
II32 (Reg -> Operand
OpReg Reg
src_r)       Reg
dst_r         -- lzcnt with extra 24 zeros
                , Format -> Operand -> Operand -> Instr
SUB    Format
II32 (Imm -> Operand
OpImm (Int -> Imm
ImmInt Int
24)) (Reg -> Operand
OpReg Reg
dst_r) -- compensate for extra zeros
W16 -> [Instr] -> InstrBlock
forall a. [a] -> OrdList a
                [ Format -> Operand -> Reg -> Instr
LZCNT  Format
II16 (Reg -> Operand
OpReg Reg
src_r) Reg
                , Format -> Operand -> Operand -> Instr
MOVZxL Format
II16 (Reg -> Operand
OpReg Reg
dst_r) (Reg -> Operand
OpReg Reg
dst_r) -- zero-extend from 16 bit
_ -> Instr -> InstrBlock
forall a. a -> OrdList a
unitOL (Format -> Operand -> Reg -> Instr
LZCNT (Width -> Format
intFormat Width
width) (Reg -> Operand
OpReg Reg
src_r) Reg
        else do
          let format = if Width
width Width -> Width -> Bool
forall a. Eq a => a -> a -> Bool
== Width
W8 then Format
II16 else Width -> Format
intFormat Width
          let bw = Width -> Int
widthInBits Width
          src_r <- getNewRegNat format
          tmp_r <- getNewRegNat format
          return $ code_src src_r `appOL` toOL
                   ([ MOVZxL   II8    (OpReg src_r) (OpReg src_r) | width == W8 ] ++
                    [ BSR      format (OpReg src_r) tmp_r
                    , MOV      II32   (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
                    , CMOV NE  format (OpReg tmp_r) dst_r
                    , XOR      format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
                    ]) -- NB: We don't need to zero-extend the result for the
                       -- W8/W16 cases because the 'MOV' insn already
                       -- took care of implicitly clearing the upper bits

genWordToFloat :: BlockId -> Width -> CmmFormal -> CmmActual -> NatM InstrBlock
genWordToFloat :: Label -> Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genWordToFloat Label
bid Width
width LocalReg
dst CmmExpr
src =
  -- TODO: generate assembly instead
  Label -> FastString -> [LocalReg] -> [CmmExpr] -> NatM InstrBlock
genPrimCCall Label
bid (Width -> FastString
word2FloatLabel Width
width) [LocalReg
dst] [CmmExpr

genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
genAtomicRead Width
width MemoryOrdering
_mord LocalReg
dst CmmExpr
addr = do
  let fmt :: Format
fmt = Width -> Format
intFormat Width
  load_code <- (Operand -> Operand -> Instr)
-> CmmExpr -> NatM (Reg -> InstrBlock)
intLoadCode (Format -> Operand -> Operand -> Instr
MOV Format
fmt) CmmExpr
  return (load_code (getLocalRegReg dst))

genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAtomicWrite Width
width MemoryOrdering
mord CmmExpr
addr CmmExpr
val = do
  code <- Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_IntCode (Width -> Format
intFormat Width
width) CmmExpr
addr CmmExpr
  let needs_fence = case MemoryOrdering
mord of
MemOrderSeqCst  -> Bool
MemOrderRelease -> Bool
MemOrderAcquire -> String -> SDoc -> Bool
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"genAtomicWrite: acquire ordering on write" SDoc
forall doc. IsOutput doc => doc
MemOrderRelaxed -> Bool
  return $ if needs_fence then code `snocOL` MFENCE else code

  :: BlockId
  -> Width
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genCmpXchg :: Label
-> Width
-> LocalReg
-> CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genCmpXchg Label
bid Width
width LocalReg
dst CmmExpr
addr CmmExpr
old CmmExpr
new = do
  is32Bit <- NatM Bool
  -- On x86 we don't have enough registers to use cmpxchg with a
  -- complicated addressing mode, so on that architecture we
  -- pre-compute the address first.
  if not (is32Bit && width == W64)
    then do
      let format = Width -> Format
intFormat Width
      Amode amode addr_code <- getSimpleAmode addr
      newval <- getNewRegNat format
      newval_code <- getAnyReg new
      oldval <- getNewRegNat format
      oldval_code <- getAnyReg old
      platform <- getPlatform
      let dst_r    = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
          code     = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
                     [ Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
oldval) (Reg -> Operand
OpReg Reg
                     , Instr -> Instr
LOCK (Format -> Operand -> Operand -> Instr
format (Reg -> Operand
OpReg Reg
newval) (AddrMode -> Operand
OpAddr AddrMode
                     , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
eax) (Reg -> Operand
OpReg Reg
      return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
          `appOL` code
      -- generate C call to hs_cmpxchgN in ghc-prim
      genPrimCCall bid (cmpxchgLabel width) [dst] [addr,old,new]
      -- TODO: implement cmpxchg8b instruction

genXchg :: Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genXchg :: Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genXchg Width
width LocalReg
dst CmmExpr
addr CmmExpr
value = do
  is32Bit <- NatM Bool

  when (is32Bit && width == W64) $
    panic "genXchg: 64bit atomic exchange not supported on 32bit platforms"

  Amode amode addr_code <- getSimpleAmode addr
  (newval, newval_code) <- getSomeReg value
  let format   = Width -> Format
intFormat Width
  let dst_r    = LocalReg -> Reg
getLocalRegReg LocalReg
  -- Copy the value into the target register, perform the exchange.
  let code     = [Instr] -> InstrBlock
forall a. [a] -> OrdList a
                 [ Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
newval) (Reg -> Operand
OpReg Reg
                  -- On X86 xchg implies a lock prefix if we use a memory argument.
                  -- so this is atomic.
                 , Format -> Operand -> Reg -> Instr
XCHG Format
format (AddrMode -> Operand
OpAddr AddrMode
amode) Reg
  return $ addr_code `appOL` newval_code `appOL` code

genFloatAbs :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatAbs :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatAbs Width
width LocalReg
dst CmmExpr
src = do
    format :: Format
format = Width -> Format
floatFormat Width
    const :: CmmLit
const = case Width
width of
W32 -> Integer -> Width -> CmmLit
CmmInt Integer
0x7fffffff Width
W64 -> Integer -> Width -> CmmLit
CmmInt Integer
0x7fffffffffffffff Width
_   -> String -> SDoc -> CmmLit
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"genFloatAbs: invalid width" (Width -> SDoc
forall a. Outputable a => a -> SDoc
ppr Width
  src_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
  Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes width) const
  tmp <- getNewRegNat format
  let dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
  pure $ src_code dst_r `appOL` amode_code `appOL` toOL
           [ MOV format (OpAddr amode) (OpReg tmp)
           , AND format (OpReg tmp) (OpReg dst_r)

genFloatSqrt :: Format -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatSqrt :: Format -> LocalReg -> CmmExpr -> NatM InstrBlock
genFloatSqrt Format
format LocalReg
dst CmmExpr
src = do
  let dst_r :: Reg
dst_r = LocalReg -> Reg
getLocalRegReg LocalReg
  src_code <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
  pure $ src_code dst_r `snocOL` SQRT format (OpReg dst_r) dst_r

  :: Width
  -> (Format -> Operand -> Operand -> Instr)
  -> (Format -> Maybe (Operand -> Operand -> Instr))
  -> Cond
  -> LocalReg
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genAddSubRetCarry :: Width
-> (Format -> Operand -> Operand -> Instr)
-> (Format -> Maybe (Operand -> Operand -> Instr))
-> Cond
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genAddSubRetCarry Width
width Format -> Operand -> Operand -> Instr
instr Format -> Maybe (Operand -> Operand -> Instr)
mrevinstr Cond
cond LocalReg
res_r LocalReg
res_c CmmExpr
arg_x CmmExpr
arg_y = do
  platform <- NCGConfig -> Platform
ncgPlatform (NCGConfig -> Platform) -> NatM NCGConfig -> NatM Platform
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NatM NCGConfig
  let format = Width -> Format
intFormat Width
  rCode <- anyReg =<< trivialCode width (instr format)
                        (mrevinstr format) arg_x arg_y
  reg_tmp <- getNewRegNat II8
  let reg_c = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
      reg_r = Platform -> CmmReg -> Reg
getRegisterReg Platform
platform  (LocalReg -> CmmReg
CmmLocal LocalReg
      code = Reg -> InstrBlock
rCode Reg
reg_r InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
             Cond -> Operand -> Instr
cond (Reg -> Operand
OpReg Reg
reg_tmp) InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
             Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
reg_tmp) (Reg -> Operand
OpReg Reg
  return code

  :: Width
  -> LocalReg
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genAddWithCarry :: Width
-> LocalReg -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genAddWithCarry Width
width LocalReg
res_h LocalReg
res_l CmmExpr
arg_x CmmExpr
arg_y = do
  hCode <- HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg (CmmLit -> CmmExpr
CmmLit (Integer -> Width -> CmmLit
CmmInt Integer
0 Width
  let format = Width -> Format
intFormat Width
  lCode <- anyReg =<< trivialCode width (ADD_CC format)
                        (Just (ADD_CC format)) arg_x arg_y
  let reg_l = LocalReg -> Reg
getLocalRegReg LocalReg
      reg_h = LocalReg -> Reg
getLocalRegReg LocalReg
      code = Reg -> InstrBlock
hCode Reg
reg_h InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
             Reg -> InstrBlock
lCode Reg
reg_l InstrBlock -> Instr -> InstrBlock
forall a. OrdList a -> a -> OrdList a
             Format -> Operand -> Operand -> Instr
ADC Format
format (Imm -> Operand
OpImm (Integer -> Imm
ImmInteger Integer
0)) (Reg -> Operand
OpReg Reg
  return code

  :: Width
  -> LocalReg
  -> LocalReg
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM (OrdList Instr)
genSignedLargeMul :: Width
-> LocalReg
-> LocalReg
-> LocalReg
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genSignedLargeMul Width
width LocalReg
res_c LocalReg
res_h LocalReg
res_l CmmExpr
arg_x CmmExpr
arg_y = do
  (y_reg, y_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem CmmExpr
  x_code <- getAnyReg arg_x
  reg_tmp <- getNewRegNat II8
  let format = Width -> Format
intFormat Width
      reg_h = LocalReg -> Reg
getLocalRegReg LocalReg
      reg_l = LocalReg -> Reg
getLocalRegReg LocalReg
      reg_c = LocalReg -> Reg
getLocalRegReg LocalReg
      code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
             Reg -> InstrBlock
x_code Reg
rax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
             [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [ Format -> Operand -> Instr
IMUL2 Format
format Operand
                  , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
rdx) (Reg -> Operand
OpReg Reg
                  , Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
rax) (Reg -> Operand
OpReg Reg
                  , Cond -> Operand -> Instr
CARRY (Reg -> Operand
OpReg Reg
                  , Format -> Operand -> Operand -> Instr
MOVZxL Format
II8 (Reg -> Operand
OpReg Reg
reg_tmp) (Reg -> Operand
OpReg Reg
  return code

  :: Width
  -> LocalReg
  -> LocalReg
  -> CmmExpr
  -> CmmExpr
  -> NatM (OrdList Instr)
genUnsignedLargeMul :: Width
-> LocalReg -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
genUnsignedLargeMul Width
width LocalReg
res_h LocalReg
res_l CmmExpr
arg_x CmmExpr
arg_y = do
  (y_reg, y_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem CmmExpr
  x_code <- getAnyReg arg_x
  let format = Width -> Format
intFormat Width
      reg_h = LocalReg -> Reg
getLocalRegReg LocalReg
      reg_l = LocalReg -> Reg
getLocalRegReg LocalReg
      code = InstrBlock
y_code InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
             Reg -> InstrBlock
x_code Reg
rax InstrBlock -> InstrBlock -> InstrBlock
forall a. OrdList a -> OrdList a -> OrdList a
             [Instr] -> InstrBlock
forall a. [a] -> OrdList a
toOL [Format -> Operand -> Instr
MUL2 Format
format Operand
                   Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
rdx) (Reg -> Operand
OpReg Reg
                   Format -> Operand -> Operand -> Instr
MOV Format
format (Reg -> Operand
OpReg Reg
rax) (Reg -> Operand
OpReg Reg
  return code

  :: Width
  -> Bool
  -> LocalReg
  -> LocalReg
  -> Maybe CmmExpr
  -> CmmExpr
  -> CmmExpr
  -> NatM InstrBlock
genQuotRem :: Width
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
width Bool
signed LocalReg
res_q LocalReg
res_r Maybe CmmExpr
m_arg_x_high CmmExpr
arg_x_low CmmExpr
arg_y = do
  case Width
width of
W8 -> do
      -- See Note [DIV/IDIV for bytes]
      let widen :: MachOp
widen | Bool
signed = Width -> Width -> MachOp
MO_SS_Conv Width
W8 Width
                | Bool
otherwise = Width -> Width -> MachOp
MO_UU_Conv Width
W8 Width
          arg_x_low_16 :: CmmExpr
arg_x_low_16 = MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
          arg_y_16 :: CmmExpr
arg_y_16 = MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
          m_arg_x_high_16 :: Maybe CmmExpr
m_arg_x_high_16 = (\CmmExpr
p -> MachOp -> [CmmExpr] -> CmmExpr
CmmMachOp MachOp
widen [CmmExpr
p]) (CmmExpr -> CmmExpr) -> Maybe CmmExpr -> Maybe CmmExpr
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe CmmExpr
-> Bool
-> LocalReg
-> LocalReg
-> Maybe CmmExpr
-> CmmExpr
-> CmmExpr
-> NatM InstrBlock
genQuotRem Width
W16 Bool
signed LocalReg
res_q LocalReg
res_r Maybe CmmExpr
m_arg_x_high_16 CmmExpr
arg_x_low_16 CmmExpr

_ -> do
      let format :: Format
format = Width -> Format
intFormat Width
          reg_q :: Reg
reg_q = LocalReg -> Reg
getLocalRegReg LocalReg
          reg_r :: Reg
reg_r = LocalReg -> Reg
getLocalRegReg LocalReg
          widen :: Instr
widen | Bool
signed    = Format -> Instr
CLTD Format
                | Bool
otherwise = Format -> Operand -> Operand -> Instr
XOR Format
format (Reg -> Operand
OpReg Reg
rdx) (Reg -> Operand
OpReg Reg
          instr :: Format -> Operand -> Instr
instr | Bool
signed    = Format -> Operand -> Instr
                | Bool
otherwise = Format -> Operand -> Instr
      (y_reg, y_code) <- CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem CmmExpr
      x_low_code <- getAnyReg arg_x_low
      x_high_code <- case m_arg_x_high of
                     Just CmmExpr
arg_x_high ->
                         HasDebugCallStack => CmmExpr -> NatM (Reg -> InstrBlock)
CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg CmmExpr
                     Maybe CmmExpr
Nothing ->
                         (Reg -> InstrBlock) -> NatM (Reg -> InstrBlock)
forall a. a -> NatM a
forall (m :: * -> *) a. Monad m => a -> m a
return ((Reg -> InstrBlock) -> NatM (Reg -> InstrBlock))
-> (Reg -> InstrBlock) -> NatM (Reg -> InstrBlock)
forall a b. (a -> b) -> a -> b
$ InstrBlock -> Reg -> InstrBlock
forall a b. a -> b -> a
const (InstrBlock -> Reg -> InstrBlock)
-> InstrBlock -> Reg -> InstrBlock
forall a b. (a -> b) -> a -> b
$ Instr -> InstrBlock
forall a. a -> OrdList a
unitOL Instr
      return $ y_code `appOL`
               x_low_code rax `appOL`
               x_high_code rdx `appOL`
               toOL [instr format y_reg,
                     MOV format (OpReg rax) (OpReg reg_q),
                     MOV format (OpReg rdx) (OpReg reg_r)]