{-
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998

\section[SimplCore]{Driver for simplifying @Core@ programs}
-}

{-# LANGUAGE CPP #-}

module SimplCore ( core2core, simplifyExpr ) where

#include "HsVersions.h"

import DynFlags
import CoreSyn
import HscTypes
import CSE              ( cseProgram )
import Rules            ( mkRuleBase, unionRuleBase,
                          extendRuleBaseList, ruleCheckProgram, addRuleInfo, )
import PprCore          ( pprCoreBindings, pprCoreExpr )
import OccurAnal        ( occurAnalysePgm, occurAnalyseExpr )
import IdInfo
import CoreStats        ( coreBindsSize, coreBindsStats, exprSize )
import CoreUtils        ( mkTicks, stripTicksTop )
import CoreLint         ( endPass, lintPassResult, dumpPassResult,
                          lintAnnots )
import Simplify         ( simplTopBinds, simplExpr, simplRules )
import SimplUtils       ( simplEnvForGHCi, activeRule )
import SimplEnv
import SimplMonad
import CoreMonad
import qualified ErrUtils as Err
import FloatIn          ( floatInwards )
import FloatOut         ( floatOutwards )
import FamInstEnv
import Id
import ErrUtils         ( withTiming )
import BasicTypes       ( CompilerPhase(..), isDefaultInlinePragma )
import VarSet
import VarEnv
import LiberateCase     ( liberateCase )
import SAT              ( doStaticArgs )
import Specialise       ( specProgram)
import SpecConstr       ( specConstrProgram)
import DmdAnal          ( dmdAnalProgram )
import CallArity        ( callArityAnalProgram )
import WorkWrap         ( wwTopBinds )
import Vectorise        ( vectorise )
import SrcLoc
import Util
import Module

import Maybes
import UniqSupply       ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
import Outputable
import Control.Monad

#ifdef GHCI
import DynamicLoading   ( loadPlugins )
import Plugins          ( installCoreToDos )
#else
import DynamicLoading   ( pluginError )
#endif

{-
************************************************************************
*                                                                      *
\subsection{The driver for the simplifier}
*                                                                      *
************************************************************************
-}

core2core :: HscEnv -> ModGuts -> IO ModGuts
core2core hsc_env guts@(ModGuts { mg_module  = mod
                                , mg_loc     = loc
                                , mg_deps    = deps
                                , mg_rdr_env = rdr_env })
  = do { us <- mkSplitUniqSupply 's'
       -- make sure all plugins are loaded

       ; let builtin_passes = getCoreToDo dflags
             orph_mods = mkModuleSet (mod : dep_orphs deps)
       ;
       ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
                                    orph_mods print_unqual loc $
                           do { all_passes <- addPluginPasses builtin_passes
                              ; runCorePasses all_passes guts }

       ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
             "Grand total simplifier statistics"
             (pprSimplCount stats)

       ; return guts2 }
  where
    dflags         = hsc_dflags hsc_env
    home_pkg_rules = hptRules hsc_env (dep_mods deps)
    hpt_rule_base  = mkRuleBase home_pkg_rules
    print_unqual   = mkPrintUnqualified dflags rdr_env
    -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
    -- This is very convienent for the users of the monad (e.g. plugins do not have to
    -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
    -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
    -- would mean our cached value would go out of date.

{-
************************************************************************
*                                                                      *
           Generating the main optimisation pipeline
*                                                                      *
************************************************************************
-}

getCoreToDo :: DynFlags -> [CoreToDo]
getCoreToDo dflags
  = flatten_todos core_todo
  where
    opt_level     = optLevel           dflags
    phases        = simplPhases        dflags
    max_iter      = maxSimplIterations dflags
    rule_check    = ruleCheck          dflags
    call_arity    = gopt Opt_CallArity                    dflags
    strictness    = gopt Opt_Strictness                   dflags
    full_laziness = gopt Opt_FullLaziness                 dflags
    do_specialise = gopt Opt_Specialise                   dflags
    do_float_in   = gopt Opt_FloatIn                      dflags
    cse           = gopt Opt_CSE                          dflags
    spec_constr   = gopt Opt_SpecConstr                   dflags
    liberate_case = gopt Opt_LiberateCase                 dflags
    late_dmd_anal = gopt Opt_LateDmdAnal                  dflags
    static_args   = gopt Opt_StaticArgumentTransformation dflags
    rules_on      = gopt Opt_EnableRewriteRules           dflags
    eta_expand_on = gopt Opt_DoLambdaEtaExpansion         dflags
    ww_on         = gopt Opt_WorkerWrapper                dflags

    maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)

    maybe_strictness_before phase
      = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness

    base_mode = SimplMode { sm_phase      = panic "base_mode"
                          , sm_names      = []
                          , sm_rules      = rules_on
                          , sm_eta_expand = eta_expand_on
                          , sm_inline     = True
                          , sm_case_case  = True }

    simpl_phase phase names iter
      = CoreDoPasses
      $   [ maybe_strictness_before phase
          , CoreDoSimplify iter
                (base_mode { sm_phase = Phase phase
                           , sm_names = names })

          , maybe_rule_check (Phase phase) ]

          -- Vectorisation can introduce a fair few common sub expressions involving
          --  DPH primitives. For example, see the Reverse test from dph-examples.
          --  We need to eliminate these common sub expressions before their definitions
          --  are inlined in phase 2. The CSE introduces lots of  v1 = v2 bindings,
          --  so we also run simpl_gently to inline them.
      ++  (if gopt Opt_Vectorise dflags && phase == 3
            then [CoreCSE, simpl_gently]
            else [])

    vectorisation
      = runWhen (gopt Opt_Vectorise dflags) $
          CoreDoPasses [ simpl_gently, CoreDoVectorisation ]

                -- By default, we have 2 phases before phase 0.

                -- Want to run with inline phase 2 after the specialiser to give
                -- maximum chance for fusion to work before we inline build/augment
                -- in phase 1.  This made a difference in 'ansi' where an
                -- overloaded function wasn't inlined till too late.

                -- Need phase 1 so that build/augment get
                -- inlined.  I found that spectral/hartel/genfft lost some useful
                -- strictness in the function sumcode' if augment is not inlined
                -- before strictness analysis runs
    simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
                                | phase <- [phases, phases-1 .. 1] ]


        -- initial simplify: mk specialiser happy: minimum effort please
    simpl_gently = CoreDoSimplify max_iter
                       (base_mode { sm_phase = InitialPhase
                                  , sm_names = ["Gentle"]
                                  , sm_rules = rules_on   -- Note [RULEs enabled in SimplGently]
                                  , sm_inline = False
                                  , sm_case_case = False })
                          -- Don't do case-of-case transformations.
                          -- This makes full laziness work better

    strictness_pass = if ww_on
                       then [CoreDoStrictness,CoreDoWorkerWrapper]
                       else [CoreDoStrictness]


    -- New demand analyser
    demand_analyser = (CoreDoPasses (
                           strictness_pass ++
                           [simpl_phase 0 ["post-worker-wrapper"] max_iter]
                           ))

    core_todo =
     if opt_level == 0 then
       [ vectorisation
       , CoreDoSimplify max_iter
             (base_mode { sm_phase = Phase 0
                        , sm_names = ["Non-opt simplification"] })
       ]

     else {- opt_level >= 1 -} [

    -- We want to do the static argument transform before full laziness as it
    -- may expose extra opportunities to float things outwards. However, to fix
    -- up the output of the transformation we need at do at least one simplify
    -- after this before anything else
        runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),

        -- We run vectorisation here for now, but we might also try to run
        -- it later
        vectorisation,

        -- initial simplify: mk specialiser happy: minimum effort please
        simpl_gently,

        -- Specialisation is best done before full laziness
        -- so that overloaded functions have all their dictionary lambdas manifest
        runWhen do_specialise CoreDoSpecialising,

        runWhen full_laziness $
           CoreDoFloatOutwards FloatOutSwitches {
                                 floatOutLambdas   = Just 0,
                                 floatOutConstants = True,
                                 floatOutOverSatApps = False },
                -- Was: gentleFloatOutSwitches
                --
                -- I have no idea why, but not floating constants to
                -- top level is very bad in some cases.
                --
                -- Notably: p_ident in spectral/rewrite
                --          Changing from "gentle" to "constantsOnly"
                --          improved rewrite's allocation by 19%, and
                --          made 0.0% difference to any other nofib
                --          benchmark
                --
                -- Not doing floatOutOverSatApps yet, we'll do
                -- that later on when we've had a chance to get more
                -- accurate arity information.  In fact it makes no
                -- difference at all to performance if we do it here,
                -- but maybe we save some unnecessary to-and-fro in
                -- the simplifier.

        simpl_phases,

                -- Phase 0: allow all Ids to be inlined now
                -- This gets foldr inlined before strictness analysis

                -- At least 3 iterations because otherwise we land up with
                -- huge dead expressions because of an infelicity in the
                -- simpifier.
                --      let k = BIG in foldr k z xs
                -- ==>  let k = BIG in letrec go = \xs -> ...(k x).... in go xs
                -- ==>  let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
                -- Don't stop now!
        simpl_phase 0 ["main"] (max max_iter 3),

        runWhen do_float_in CoreDoFloatInwards,
            -- Run float-inwards immediately before the strictness analyser
            -- Doing so pushes bindings nearer their use site and hence makes
            -- them more likely to be strict. These bindings might only show
            -- up after the inlining from simplification.  Example in fulsom,
            -- Csg.calc, where an arg of timesDouble thereby becomes strict.

        runWhen call_arity $ CoreDoPasses
            [ CoreDoCallArity
            , simpl_phase 0 ["post-call-arity"] max_iter
            ],

        runWhen strictness demand_analyser,

        runWhen full_laziness $
           CoreDoFloatOutwards FloatOutSwitches {
                                 floatOutLambdas     = floatLamArgs dflags,
                                 floatOutConstants   = True,
                                 floatOutOverSatApps = True },
                -- nofib/spectral/hartel/wang doubles in speed if you
                -- do full laziness late in the day.  It only happens
                -- after fusion and other stuff, so the early pass doesn't
                -- catch it.  For the record, the redex is
                --        f_el22 (f_el21 r_midblock)


        runWhen cse CoreCSE,
                -- We want CSE to follow the final full-laziness pass, because it may
                -- succeed in commoning up things floated out by full laziness.
                -- CSE used to rely on the no-shadowing invariant, but it doesn't any more

        runWhen do_float_in CoreDoFloatInwards,

        maybe_rule_check (Phase 0),

                -- Case-liberation for -O2.  This should be after
                -- strictness analysis and the simplification which follows it.
        runWhen liberate_case (CoreDoPasses [
            CoreLiberateCase,
            simpl_phase 0 ["post-liberate-case"] max_iter
            ]),         -- Run the simplifier after LiberateCase to vastly
                        -- reduce the possiblility of shadowing
                        -- Reason: see Note [Shadowing] in SpecConstr.hs

        runWhen spec_constr CoreDoSpecConstr,

        maybe_rule_check (Phase 0),

        -- Final clean-up simplification:
        simpl_phase 0 ["final"] max_iter,

        runWhen late_dmd_anal $ CoreDoPasses (
            strictness_pass ++
            [simpl_phase 0 ["post-late-ww"] max_iter]
          ),

        maybe_rule_check (Phase 0)
     ]

    -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
    flatten_todos [] = []
    flatten_todos (CoreDoNothing : rest) = flatten_todos rest
    flatten_todos (CoreDoPasses passes : rest) =
      flatten_todos passes ++ flatten_todos rest
    flatten_todos (todo : rest) = todo : flatten_todos rest

-- Loading plugins

addPluginPasses :: [CoreToDo] -> CoreM [CoreToDo]
#ifndef GHCI
addPluginPasses builtin_passes
  = do { dflags <- getDynFlags
       ; let pluginMods = pluginModNames dflags
       ; unless (null pluginMods) (pluginError pluginMods)
       ; return builtin_passes }
#else
addPluginPasses builtin_passes
  = do { hsc_env <- getHscEnv
       ; named_plugins <- liftIO (loadPlugins hsc_env)
       ; foldM query_plug builtin_passes named_plugins }
  where
    query_plug todos (_, plug, options) = installCoreToDos plug options todos
#endif

{-
************************************************************************
*                                                                      *
                  The CoreToDo interpreter
*                                                                      *
************************************************************************
-}

runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
runCorePasses passes guts
  = foldM do_pass guts passes
  where
    do_pass guts CoreDoNothing = return guts
    do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
    do_pass guts pass
       = withTiming getDynFlags
                    (ppr pass <+> brackets (ppr mod))
                    (const ()) $ do
            { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
            ; endPass pass (mg_binds guts') (mg_rules guts')
            ; return guts' }

    mod = mg_module guts

doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
doCorePass pass@(CoreDoSimplify {})  = {-# SCC "Simplify" #-}
                                       simplifyPgm pass

doCorePass CoreCSE                   = {-# SCC "CommonSubExpr" #-}
                                       doPass cseProgram

doCorePass CoreLiberateCase          = {-# SCC "LiberateCase" #-}
                                       doPassD liberateCase

doCorePass CoreDoFloatInwards        = {-# SCC "FloatInwards" #-}
                                       doPassD floatInwards

doCorePass (CoreDoFloatOutwards f)   = {-# SCC "FloatOutwards" #-}
                                       doPassDUM (floatOutwards f)

doCorePass CoreDoStaticArgs          = {-# SCC "StaticArgs" #-}
                                       doPassU doStaticArgs

doCorePass CoreDoCallArity           = {-# SCC "CallArity" #-}
                                       doPassD callArityAnalProgram

doCorePass CoreDoStrictness          = {-# SCC "NewStranal" #-}
                                       doPassDFM dmdAnalProgram

doCorePass CoreDoWorkerWrapper       = {-# SCC "WorkWrap" #-}
                                       doPassDFU wwTopBinds

doCorePass CoreDoSpecialising        = {-# SCC "Specialise" #-}
                                       specProgram

doCorePass CoreDoSpecConstr          = {-# SCC "SpecConstr" #-}
                                       specConstrProgram

doCorePass CoreDoVectorisation       = {-# SCC "Vectorise" #-}
                                       vectorise

doCorePass CoreDoPrintCore              = observe   printCore
doCorePass (CoreDoRuleCheck phase pat)  = ruleCheckPass phase pat
doCorePass CoreDoNothing                = return
doCorePass (CoreDoPasses passes)        = runCorePasses passes

#ifdef GHCI
doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
#endif

doCorePass pass = pprPanic "doCorePass" (ppr pass)

{-
************************************************************************
*                                                                      *
\subsection{Core pass combinators}
*                                                                      *
************************************************************************
-}

printCore :: DynFlags -> CoreProgram -> IO ()
printCore dflags binds
    = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)

ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
ruleCheckPass current_phase pat guts =
    withTiming getDynFlags
               (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
               (const ()) $ do
    { rb <- getRuleBase
    ; dflags <- getDynFlags
    ; vis_orphs <- getVisibleOrphanMods
    ; liftIO $ log_action dflags dflags NoReason Err.SevDump noSrcSpan
                   defaultDumpStyle
                   (ruleCheckProgram current_phase pat
                      (RuleEnv rb vis_orphs) (mg_binds guts))
    ; return guts }

doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
doPassDUM do_pass = doPassM $ \binds -> do
    dflags <- getDynFlags
    us     <- getUniqueSupplyM
    liftIO $ do_pass dflags us binds

doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))

doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)

doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)

doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
doPassU do_pass = doPassDU (const do_pass)

doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
doPassDFM do_pass guts = do
    dflags <- getDynFlags
    p_fam_env <- getPackageFamInstEnv
    let fam_envs = (p_fam_env, mg_fam_inst_env guts)
    doPassM (liftIO . do_pass dflags fam_envs) guts

doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
doPassDFU do_pass guts = do
    dflags <- getDynFlags
    us     <- getUniqueSupplyM
    p_fam_env <- getPackageFamInstEnv
    let fam_envs = (p_fam_env, mg_fam_inst_env guts)
    doPass (do_pass dflags fam_envs us) guts

-- Most passes return no stats and don't change rules: these combinators
-- let us lift them to the full blown ModGuts+CoreM world
doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
doPassM bind_f guts = do
    binds' <- bind_f (mg_binds guts)
    return (guts { mg_binds = binds' })

doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }

-- Observer passes just peek; don't modify the bindings at all
observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
observe do_pass = doPassM $ \binds -> do
    dflags <- getDynFlags
    _ <- liftIO $ do_pass dflags binds
    return binds

{-
************************************************************************
*                                                                      *
        Gentle simplification
*                                                                      *
************************************************************************
-}

simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
             -> CoreExpr
             -> IO CoreExpr
-- simplifyExpr is called by the driver to simplify an
-- expression typed in at the interactive prompt
--
-- Also used by Template Haskell
simplifyExpr dflags expr
  = withTiming (return dflags) (text "Simplify [expr]") (const ()) $
    do  {
        ; us <-  mkSplitUniqSupply 's'

        ; let sz = exprSize expr

        ; (expr', counts) <- initSmpl dflags emptyRuleEnv
                               emptyFamInstEnvs us sz
                               (simplExprGently (simplEnvForGHCi dflags) expr)

        ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
                  "Simplifier statistics" (pprSimplCount counts)

        ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
                        (pprCoreExpr expr')

        ; return expr'
        }

simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
-- Simplifies an expression
--      does occurrence analysis, then simplification
--      and repeats (twice currently) because one pass
--      alone leaves tons of crud.
-- Used (a) for user expressions typed in at the interactive prompt
--      (b) the LHS and RHS of a RULE
--      (c) Template Haskell splices
--
-- The name 'Gently' suggests that the SimplifierMode is SimplGently,
-- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
-- enforce that; it just simplifies the expression twice

-- It's important that simplExprGently does eta reduction; see
-- Note [Simplifying the left-hand side of a RULE] above.  The
-- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
-- but only if -O is on.

simplExprGently env expr = do
    expr1 <- simplExpr env (occurAnalyseExpr expr)
    simplExpr env (occurAnalyseExpr expr1)

{-
************************************************************************
*                                                                      *
\subsection{The driver for the simplifier}
*                                                                      *
************************************************************************
-}

simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
simplifyPgm pass guts
  = do { hsc_env <- getHscEnv
       ; us <- getUniqueSupplyM
       ; rb <- getRuleBase
       ; liftIOWithCount $
         simplifyPgmIO pass hsc_env us rb guts }

simplifyPgmIO :: CoreToDo
              -> HscEnv
              -> UniqSupply
              -> RuleBase
              -> ModGuts
              -> IO (SimplCount, ModGuts)  -- New bindings

simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
              hsc_env us hpt_rule_base
              guts@(ModGuts { mg_module = this_mod
                            , mg_rdr_env = rdr_env
                            , mg_deps = deps
                            , mg_binds = binds, mg_rules = rules
                            , mg_fam_inst_env = fam_inst_env })
  = do { (termination_msg, it_count, counts_out, guts')
           <- do_iteration us 1 [] binds rules

        ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
                                dopt Opt_D_dump_simpl_stats  dflags)
                  "Simplifier statistics for following pass"
                  (vcat [text termination_msg <+> text "after" <+> ppr it_count
                                              <+> text "iterations",
                         blankLine,
                         pprSimplCount counts_out])

        ; return (counts_out, guts')
    }
  where
    dflags       = hsc_dflags hsc_env
    print_unqual = mkPrintUnqualified dflags rdr_env
    simpl_env    = mkSimplEnv mode
    active_rule  = activeRule simpl_env

    do_iteration :: UniqSupply
                 -> Int          -- Counts iterations
                 -> [SimplCount] -- Counts from earlier iterations, reversed
                 -> CoreProgram  -- Bindings in
                 -> [CoreRule]   -- and orphan rules
                 -> IO (String, Int, SimplCount, ModGuts)

    do_iteration us iteration_no counts_so_far binds rules
        -- iteration_no is the number of the iteration we are
        -- about to begin, with '1' for the first
      | iteration_no > max_iterations   -- Stop if we've run out of iterations
      = WARN( debugIsOn && (max_iterations > 2)
            , hang (text "Simplifier bailing out after" <+> int max_iterations
                    <+> text "iterations"
                    <+> (brackets $ hsep $ punctuate comma $
                         map (int . simplCountN) (reverse counts_so_far)))
                 2 (text "Size =" <+> ppr (coreBindsStats binds)))

                -- Subtract 1 from iteration_no to get the
                -- number of iterations we actually completed
        return ( "Simplifier baled out", iteration_no - 1
               , totalise counts_so_far
               , guts { mg_binds = binds, mg_rules = rules } )

      -- Try and force thunks off the binds; significantly reduces
      -- space usage, especially with -O.  JRS, 000620.
      | let sz = coreBindsSize binds
      , () <- sz `seq` ()     -- Force it
      = do {
                -- Occurrence analysis
           let {   -- Note [Vectorisation declarations and occurrences]
                   -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                   -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
                   -- that the right-hand sides of vectorisation declarations are taken into
                   -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
                   -- that the binders representing variable vectorisation declarations are kept alive.
                   -- (In contrast to automatically vectorised variables, their unvectorised versions
                   -- don't depend on them.)
                 vectVars = mkVarSet $
                              catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
                                        | Vect bndr _ <- mg_vect_decls guts]
                              ++
                              catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
                                        | bndr <- bindersOfBinds binds]
                                        -- FIXME: This second comprehensions is only needed as long as we
                                        --        have vectorised bindings where we get "Could NOT call
                                        --        vectorised from original version".
              ;  (maybeVects, maybeVectVars)
                   = case sm_phase mode of
                       InitialPhase -> (mg_vect_decls guts, vectVars)
                       _            -> ([], vectVars)
               ; tagged_binds = {-# SCC "OccAnal" #-}
                     occurAnalysePgm this_mod active_rule rules
                                     maybeVects maybeVectVars binds
               } ;
           Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
                     (pprCoreBindings tagged_binds);

                -- Get any new rules, and extend the rule base
                -- See Note [Overall plumbing for rules] in Rules.hs
                -- We need to do this regularly, because simplification can
                -- poke on IdInfo thunks, which in turn brings in new rules
                -- behind the scenes.  Otherwise there's a danger we'll simply
                -- miss the rules for Ids hidden inside imported inlinings
           eps <- hscEPS hsc_env ;
           let  { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
                ; rule_base2 = extendRuleBaseList rule_base1 rules
                ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
                ; vis_orphs = this_mod : dep_orphs deps } ;

                -- Simplify the program
           ((binds1, rules1), counts1) <-
             initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
               do { env1 <- {-# SCC "SimplTopBinds" #-}
                            simplTopBinds simpl_env tagged_binds

                      -- Apply the substitution to rules defined in this module
                      -- for imported Ids.  Eg  RULE map my_f = blah
                      -- If we have a substitution my_f :-> other_f, we'd better
                      -- apply it to the rule to, or it'll never match
                  ; rules1 <- simplRules env1 Nothing rules

                  ; return (getFloatBinds env1, rules1) } ;

                -- Stop if nothing happened; don't dump output
           if isZeroSimplCount counts1 then
                return ( "Simplifier reached fixed point", iteration_no
                       , totalise (counts1 : counts_so_far)  -- Include "free" ticks
                       , guts { mg_binds = binds1, mg_rules = rules1 } )
           else do {
                -- Short out indirections
                -- We do this *after* at least one run of the simplifier
                -- because indirection-shorting uses the export flag on *occurrences*
                -- and that isn't guaranteed to be ok until after the first run propagates
                -- stuff from the binding site to its occurrences
                --
                -- ToDo: alas, this means that indirection-shorting does not happen at all
                --       if the simplifier does nothing (not common, I know, but unsavoury)
           let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;

                -- Dump the result of this iteration
           dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
           lintPassResult hsc_env pass binds2 ;

                -- Loop
           do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
           } }
      | otherwise = panic "do_iteration"
      where
        (us1, us2) = splitUniqSupply us

        -- Remember the counts_so_far are reversed
        totalise :: [SimplCount] -> SimplCount
        totalise = foldr (\c acc -> acc `plusSimplCount` c)
                         (zeroSimplCount dflags)

simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"

-------------------
dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
                   -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
dump_end_iteration dflags print_unqual iteration_no counts binds rules
  = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
  where
    mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
            | otherwise                               = Nothing
            -- Show details if Opt_D_dump_simpl_iterations is on

    hdr = text "Simplifier iteration=" <> int iteration_no
    pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
                     , pprSimplCount counts
                     , text "---- End of simplifier counts for" <+> hdr ]

{-
************************************************************************
*                                                                      *
                Shorting out indirections
*                                                                      *
************************************************************************

If we have this:

        x_local = <expression>
        ...bindings...
        x_exported = x_local

where x_exported is exported, and x_local is not, then we replace it with this:

        x_exported = <expression>
        x_local = x_exported
        ...bindings...

Without this we never get rid of the x_exported = x_local thing.  This
save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
makes strictness information propagate better.  This used to happen in
the final phase, but it's tidier to do it here.

Note [Transferring IdInfo]
~~~~~~~~~~~~~~~~~~~~~~~~~~
We want to propagage any useful IdInfo on x_local to x_exported.

STRICTNESS: if we have done strictness analysis, we want the strictness info on
x_local to transfer to x_exported.  Hence the copyIdInfo call.

RULES: we want to *add* any RULES for x_local to x_exported.


Note [Messing up the exported Id's RULES]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We must be careful about discarding (obviously) or even merging the
RULES on the exported Id. The example that went bad on me at one stage
was this one:

    iterate :: (a -> a) -> a -> [a]
        [Exported]
    iterate = iterateList

    iterateFB c f x = x `c` iterateFB c f (f x)
    iterateList f x =  x : iterateList f (f x)
        [Not exported]

    {-# RULES
    "iterate"   forall f x.     iterate f x = build (\c _n -> iterateFB c f x)
    "iterateFB"                 iterateFB (:) = iterateList
     #-}

This got shorted out to:

    iterateList :: (a -> a) -> a -> [a]
    iterateList = iterate

    iterateFB c f x = x `c` iterateFB c f (f x)
    iterate f x =  x : iterate f (f x)

    {-# RULES
    "iterate"   forall f x.     iterate f x = build (\c _n -> iterateFB c f x)
    "iterateFB"                 iterateFB (:) = iterate
     #-}

And now we get an infinite loop in the rule system
        iterate f x -> build (\cn -> iterateFB c f x)
                    -> iterateFB (:) f x
                    -> iterate f x

Old "solution":
        use rule switching-off pragmas to get rid
        of iterateList in the first place

But in principle the user *might* want rules that only apply to the Id
he says.  And inline pragmas are similar
   {-# NOINLINE f #-}
   f = local
   local = <stuff>
Then we do not want to get rid of the NOINLINE.

Hence hasShortableIdinfo.


Note [Rules and indirection-zapping]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Problem: what if x_exported has a RULE that mentions something in ...bindings...?
Then the things mentioned can be out of scope!  Solution
 a) Make sure that in this pass the usage-info from x_exported is
        available for ...bindings...
 b) If there are any such RULES, rec-ify the entire top-level.
    It'll get sorted out next time round

Other remarks
~~~~~~~~~~~~~
If more than one exported thing is equal to a local thing (i.e., the
local thing really is shared), then we do one only:
\begin{verbatim}
        x_local = ....
        x_exported1 = x_local
        x_exported2 = x_local
==>
        x_exported1 = ....

        x_exported2 = x_exported1
\end{verbatim}

We rely on prior eta reduction to simplify things like
\begin{verbatim}
        x_exported = /\ tyvars -> x_local tyvars
==>
        x_exported = x_local
\end{verbatim}
Hence,there's a possibility of leaving unchanged something like this:
\begin{verbatim}
        x_local = ....
        x_exported1 = x_local Int
\end{verbatim}
By the time we've thrown away the types in STG land this
could be eliminated.  But I don't think it's very common
and it's dangerous to do this fiddling in STG land
because we might elminate a binding that's mentioned in the
unfolding for something.

Note [Indirection zapping and ticks]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Unfortunately this is another place where we need a special case for
ticks. The following happens quite regularly:

        x_local = <expression>
        x_exported = tick<x> x_local

Which we want to become:

        x_exported =  tick<x> <expression>

As it makes no sense to keep the tick and the expression on separate
bindings. Note however that that this might increase the ticks scoping
over the execution of x_local, so we can only do this for floatable
ticks. More often than not, other references will be unfoldings of
x_exported, and therefore carry the tick anyway.
-}

type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks

shortOutIndirections :: CoreProgram -> CoreProgram
shortOutIndirections binds
  | isEmptyVarEnv ind_env = binds
  | no_need_to_flatten    = binds'                      -- See Note [Rules and indirect-zapping]
  | otherwise             = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
  where
    ind_env            = makeIndEnv binds
    -- These exported Ids are the subjects  of the indirection-elimination
    exp_ids            = map fst $ varEnvElts ind_env
    exp_id_set         = mkVarSet exp_ids
    no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
    binds'             = concatMap zap binds

    zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
    zap (Rec pairs)       = [Rec (concatMap zapPair pairs)]

    zapPair (bndr, rhs)
        | bndr `elemVarSet` exp_id_set = []
        | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
                                       = [(transferIdInfo exp_id bndr,
                                           mkTicks ticks rhs),
                                          (bndr, Var exp_id)]
        | otherwise                    = [(bndr,rhs)]

makeIndEnv :: [CoreBind] -> IndEnv
makeIndEnv binds
  = foldr add_bind emptyVarEnv binds
  where
    add_bind :: CoreBind -> IndEnv -> IndEnv
    add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
    add_bind (Rec pairs)              env = foldr add_pair env pairs

    add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
    add_pair (exported_id, exported) env
        | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
        , shortMeOut env exported_id local_id
        = extendVarEnv env local_id (exported_id, ticks)
    add_pair _ env = env

-----------------
shortMeOut :: IndEnv -> Id -> Id -> Bool
shortMeOut ind_env exported_id local_id
-- The if-then-else stuff is just so I can get a pprTrace to see
-- how often I don't get shorting out because of IdInfo stuff
  = if isExportedId exported_id &&              -- Only if this is exported

       isLocalId local_id &&                    -- Only if this one is defined in this
                                                --      module, so that we *can* change its
                                                --      binding to be the exported thing!

       not (isExportedId local_id) &&           -- Only if this one is not itself exported,
                                                --      since the transformation will nuke it

       not (local_id `elemVarEnv` ind_env)      -- Only if not already substituted for
    then
        if hasShortableIdInfo exported_id
        then True       -- See Note [Messing up the exported Id's IdInfo]
        else WARN( True, text "Not shorting out:" <+> ppr exported_id )
             False
    else
        False

-----------------
hasShortableIdInfo :: Id -> Bool
-- True if there is no user-attached IdInfo on exported_id,
-- so we can safely discard it
-- See Note [Messing up the exported Id's IdInfo]
hasShortableIdInfo id
  =  isEmptyRuleInfo (ruleInfo info)
  && isDefaultInlinePragma (inlinePragInfo info)
  && not (isStableUnfolding (unfoldingInfo info))
  where
     info = idInfo id

-----------------
transferIdInfo :: Id -> Id -> Id
-- See Note [Transferring IdInfo]
-- If we have
--      lcl_id = e; exp_id = lcl_id
-- and lcl_id has useful IdInfo, we don't want to discard it by going
--      gbl_id = e; lcl_id = gbl_id
-- Instead, transfer IdInfo from lcl_id to exp_id
-- Overwriting, rather than merging, seems to work ok.
transferIdInfo exported_id local_id
  = modifyIdInfo transfer exported_id
  where
    local_info = idInfo local_id
    transfer exp_info = exp_info `setStrictnessInfo`    strictnessInfo local_info
                                 `setUnfoldingInfo`     unfoldingInfo local_info
                                 `setInlinePragInfo`    inlinePragInfo local_info
                                 `setRuleInfo`          addRuleInfo (ruleInfo exp_info) new_info
    new_info = setRuleInfoHead (idName exported_id)
                               (ruleInfo local_info)
        -- Remember to set the function-name field of the
        -- rules as we transfer them from one function to another