%
% (c) The AQUA Project, Glasgow University, 19931998
%
\section[SimplUtils]{The simplifier utilities}
\begin{code}
module SimplUtils (
mkLam, mkCase, prepareAlts,
preInlineUnconditionally, postInlineUnconditionally,
activeUnfolding, activeUnfInRule, activeRule,
simplEnvForGHCi, simplEnvForRules, updModeForInlineRules,
SimplCont(..), DupFlag(..), ArgInfo(..),
isSimplified,
contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs,
pushSimplifiedArgs, countValArgs, countArgs, addArgTo,
mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
interestingCallContext,
interestingArg, mkArgInfo,
abstractFloats
) where
#include "HsVersions.h"
import SimplEnv
import CoreMonad ( SimplifierMode(..), Tick(..) )
import DynFlags
import StaticFlags
import CoreSyn
import qualified CoreSubst
import PprCore
import CoreFVs
import CoreUtils
import CoreArity
import CoreUnfold
import Name
import Id
import Var ( Var, isCoVar )
import Demand
import SimplMonad
import Type hiding( substTy )
import Coercion ( coercionKind )
import TyCon
import Unify ( dataConCannotMatch )
import VarSet
import BasicTypes
import Util
import MonadUtils
import Outputable
import FastString
import Data.List
\end{code}
%************************************************************************
%* *
The SimplCont type
%* *
%************************************************************************
A SimplCont allows the simplifier to traverse the expression in a
zipperlike fashion. The SimplCont represents the rest of the expression,
"above" the point of interest.
You can also think of a SimplCont as an "evaluation context", using
that term in the way it is used for operational semantics. This is the
way I usually think of it, For example you'll often see a syntax for
evaluation context looking like
C ::= [] | C e | case C of alts | C `cast` co
That's the kind of thing we are doing here, and I use that syntax in
the comments.
Key points:
* A SimplCont describes a *strict* context (just like
evaluation contexts do). E.g. Just [] is not a SimplCont
* A SimplCont describes a context that *does not* bind
any variables. E.g. \x. [] is not a SimplCont
\begin{code}
data SimplCont
= Stop
CallCtxt
| CoerceIt
OutCoercion
SimplCont
| ApplyTo
DupFlag
InExpr StaticEnv
SimplCont
| Select
DupFlag
InId [InAlt] StaticEnv
SimplCont
| StrictBind
InId [InBndr]
InExpr StaticEnv
SimplCont
| StrictArg
ArgInfo
CallCtxt
SimplCont
data ArgInfo
= ArgInfo {
ai_fun :: Id,
ai_args :: [OutExpr],
ai_rules :: [CoreRule],
ai_encl :: Bool,
ai_strs :: [Bool],
ai_discs :: [Int]
}
addArgTo :: ArgInfo -> OutExpr -> ArgInfo
addArgTo ai arg = ai { ai_args = arg : ai_args ai }
instance Outputable SimplCont where
ppr (Stop interesting) = ptext (sLit "Stop") <> brackets (ppr interesting)
ppr (ApplyTo dup arg _ cont) = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
) $$ ppr cont
ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
ppr (Select dup bndr alts se cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
(nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
ppr (CoerceIt co cont) = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
data DupFlag = NoDup
| Simplified
| OkToDup
isSimplified :: DupFlag -> Bool
isSimplified NoDup = False
isSimplified _ = True
instance Outputable DupFlag where
ppr OkToDup = ptext (sLit "ok")
ppr NoDup = ptext (sLit "nodup")
ppr Simplified = ptext (sLit "simpl")
\end{code}
Note [DupFlag invariants]
~~~~~~~~~~~~~~~~~~~~~~~~~
In both (ApplyTo dup _ env k)
and (Select dup _ _ env k)
the following invariants hold
(a) if dup = OkToDup, then continuation k is also oktodup
(b) if dup = OkToDup or Simplified, the substenv is empty
(and and hence no need to resimplify)
\begin{code}
mkBoringStop :: SimplCont
mkBoringStop = Stop BoringCtxt
mkRhsStop :: SimplCont
mkRhsStop = Stop (ArgCtxt False)
mkLazyArgStop :: CallCtxt -> SimplCont
mkLazyArgStop cci = Stop cci
contIsRhsOrArg :: SimplCont -> Bool
contIsRhsOrArg (Stop {}) = True
contIsRhsOrArg (StrictBind {}) = True
contIsRhsOrArg (StrictArg {}) = True
contIsRhsOrArg _ = False
contIsDupable :: SimplCont -> Bool
contIsDupable (Stop {}) = True
contIsDupable (ApplyTo OkToDup _ _ _) = True
contIsDupable (Select OkToDup _ _ _ _) = True
contIsDupable (CoerceIt _ cont) = contIsDupable cont
contIsDupable _ = False
contIsTrivial :: SimplCont -> Bool
contIsTrivial (Stop {}) = True
contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
contIsTrivial (CoerceIt _ cont) = contIsTrivial cont
contIsTrivial _ = False
contResultType :: SimplEnv -> OutType -> SimplCont -> OutType
contResultType env ty cont
= go cont ty
where
subst_ty se ty = substTy (se `setInScope` env) ty
go (Stop {}) ty = ty
go (CoerceIt co cont) _ = go cont (snd (coercionKind co))
go (StrictBind _ bs body se cont) _ = go cont (subst_ty se (exprType (mkLams bs body)))
go (StrictArg ai _ cont) _ = go cont (funResultTy (argInfoResultTy ai))
go (Select _ _ alts se cont) _ = go cont (subst_ty se (coreAltsType alts))
go (ApplyTo _ arg se cont) ty = go cont (apply_to_arg ty arg se)
apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
apply_to_arg ty _ _ = funResultTy ty
argInfoResultTy :: ArgInfo -> OutType
argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
= foldr (\arg fn_ty -> applyTypeToArg fn_ty arg) (idType fun) args
countValArgs :: SimplCont -> Int
countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
countValArgs (ApplyTo _ _ _ cont) = 1 + countValArgs cont
countValArgs _ = 0
countArgs :: SimplCont -> Int
countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont
countArgs _ = 0
contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
contArgs cont@(ApplyTo {})
= case go [] cont of { (args, cont') -> (False, args, cont') }
where
go args (ApplyTo _ arg se cont)
| isTypeArg arg = go args cont
| otherwise = go (is_interesting arg se : args) cont
go args cont = (reverse args, cont)
is_interesting arg se = interestingArg (substExpr (text "contArgs") se arg)
contArgs cont = (True, [], cont)
pushSimplifiedArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
pushSimplifiedArgs _env [] cont = cont
pushSimplifiedArgs env (arg:args) cont = ApplyTo Simplified arg env (pushSimplifiedArgs env args cont)
dropArgs :: Int -> SimplCont -> SimplCont
dropArgs 0 cont = cont
dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n1) cont
dropArgs n other = pprPanic "dropArgs" (ppr n <+> ppr other)
\end{code}
Note [Interesting call context]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We want to avoid inlining an expression where there can't possibly be
any gain, such as in an argument position. Hence, if the continuation
is interesting (eg. a case scrutinee, application etc.) then we
inline, otherwise we don't.
Previously some_benefit used to return True only if the variable was
applied to some value arguments. This didn't work:
let x = _coerce_ (T Int) Int (I# 3) in
case _coerce_ Int (T Int) x of
I# y -> ....
we want to inline x, but can't see that it's a constructor in a case
scrutinee position, and some_benefit is False.
Another example:
dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
.... case dMonadST _@_ x0 of (a,b,c) -> ....
we'd really like to inline dMonadST here, but we *don't* want to
inline if the case expression is just
case x of y { DEFAULT -> ... }
since we can just eliminate this case instead (x is in WHNF). Similar
applies when x is bound to a lambda expression. Hence
contIsInteresting looks for case expressions with just a single
default case.
\begin{code}
interestingCallContext :: SimplCont -> CallCtxt
interestingCallContext cont
= interesting cont
where
interesting (Select _ bndr _ _ _)
| isDeadBinder bndr = CaseCtxt
| otherwise = ArgCtxt False
interesting (ApplyTo _ arg _ cont)
| isTypeArg arg = interesting cont
| otherwise = ValAppCtxt
interesting (StrictArg _ cci _) = cci
interesting (StrictBind {}) = BoringCtxt
interesting (Stop cci) = cci
interesting (CoerceIt _ cont) = interesting cont
mkArgInfo :: Id
-> [CoreRule]
-> Int
-> SimplCont
-> ArgInfo
mkArgInfo fun rules n_val_args call_cont
| n_val_args < idArity fun
= ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
, ai_encl = False
, ai_strs = vanilla_stricts
, ai_discs = vanilla_discounts }
| otherwise
= ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
, ai_encl = interestingArgContext rules call_cont
, ai_strs = add_type_str (idType fun) arg_stricts
, ai_discs = arg_discounts }
where
vanilla_discounts, arg_discounts :: [Int]
vanilla_discounts = repeat 0
arg_discounts = case idUnfolding fun of
CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
-> discounts ++ vanilla_discounts
_ -> vanilla_discounts
vanilla_stricts, arg_stricts :: [Bool]
vanilla_stricts = repeat False
arg_stricts
= case splitStrictSig (idStrictness fun) of
(demands, result_info)
| not (demands `lengthExceeds` n_val_args)
->
if isBotRes result_info then
map isStrictDmd demands
else
map isStrictDmd demands ++ vanilla_stricts
| otherwise
-> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
<+> ppr n_val_args <+> ppr demands )
vanilla_stricts
add_type_str :: Type -> [Bool] -> [Bool]
add_type_str _ [] = []
add_type_str fun_ty strs
| Just (_, fun_ty') <- splitForAllTy_maybe fun_ty
= add_type_str fun_ty' strs
add_type_str fun_ty (str:strs)
| Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
= (str || isStrictType arg_ty) : add_type_str fun_ty' strs
add_type_str _ strs
= strs
interestingArgContext :: [CoreRule] -> SimplCont -> Bool
interestingArgContext rules call_cont
= notNull rules || enclosing_fn_has_rules
where
enclosing_fn_has_rules = go call_cont
go (Select {}) = False
go (ApplyTo {}) = False
go (StrictArg _ cci _) = interesting cci
go (StrictBind {}) = False
go (CoerceIt _ c) = go c
go (Stop cci) = interesting cci
interesting (ArgCtxt rules) = rules
interesting _ = False
\end{code}
%************************************************************************
%* *
\subsection{Decisions about inlining}
%* *
%************************************************************************
Inlining is controlled partly by the SimplifierMode switch. This has two
settings
SimplGently (a) Simplifying before specialiser/full laziness
(b) Simplifiying inside InlineRules
(c) Simplifying the LHS of a rule
(d) Simplifying a GHCi expression or Template
Haskell splice
SimplPhase n _ Used at all other times
Note [Gentle mode]
~~~~~~~~~~~~~~~~~~
Gentle mode has a separate boolean flag to control
a) inlining (sm_inline flag)
b) rules (sm_rules flag)
A key invariant about Gentle mode is that it is treated as the EARLIEST
phase. Something is inlined if the sm_inline flag is on AND the thing
is inlinable in the earliest phase. This is important. Example
g = ...
f x = g (g x)
If we were to inline g into f's inlining, then an importing module would
never be able to do
f e --> g (g e) ---> RULE fires
because the InlineRule for f has had g inlined into it.
On the other hand, it is bad not to do ANY inlining into an
InlineRule, because then recursive knots in instance declarations
don't get unravelled.
However, *sometimes* SimplGently must do no callsite inlining at all
(hence sm_inline = False). Before full laziness we must be careful
not to inline wrappers, because doing so inhibits floating
e.g. ...(case f x of ...)...
==> ...(case (case x of I# x# -> fw x#) of ...)...
==> ...(case x of I# x# -> case fw x# of ...)...
and now the redex (f x) isn't floatable any more.
The noinlining thing is also important for Template Haskell. You might be
compiling in oneshot mode with O2; but when TH compiles a splice before
running it, we don't want to use O2. Indeed, we don't want to inline
anything, because the bytecode interpreter might get confused about
unboxed tuples and suchlike.
Note [RULEs enabled in SimplGently]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RULES are enabled when doing "gentle" simplification. Two reasons:
* We really want the classop cancellation to happen:
op (df d1 d2) --> $cop3 d1 d2
because this breaks the mutual recursion between 'op' and 'df'
* I wanted the RULE
lift String ===> ...
to work in Template Haskell when simplifying
splices, so we get simpler code for literal strings
But watch out: list fusion can prevent floating. So use phase control
to switch off those rules until after floating.
Note [Simplifying inside InlineRules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We must take care with simplification inside InlineRules (which come from
INLINE pragmas).
First, consider the following example
let f = \pq -> BIG
in
let g = \y -> f y y
in ...g...g...g...g...g...
Now, if that's the ONLY occurrence of f, it might be inlined inside g,
and thence copied multiple times when g is inlined. HENCE we treat
any occurrence in an InlineRule as a multiple occurrence, not a single
one; see OccurAnal.addRuleUsage.
Second, we do want *do* to some modest rules/inlining stuff in InlineRules,
partly to eliminate senseless crap, and partly to break the recursive knots
generated by instance declarations. To keep things simple, we always set
the phase to 'gentle' when processing InlineRules. OK, so suppose we have
f = <rhs>
meaning "inline f in phases p where activation <act>(p) holds".
Then what inlinings/rules can we apply to the copy of <rhs> captured in
f's InlineRule? Our model is that literally <rhs> is substituted for
f when it is inlined. So our conservative plan (implemented by
updModeForInlineRules) is this:
When simplifying the RHS of an InlineRule,
If the InlineRule becomes active in phase p, then
if the current phase is *earlier than* p,
make no inlinings or rules active when simplifying the RHS
otherwise
set the phase to p when simplifying the RHS
That ensures that
a) Rules/inlinings that *cease* being active before p will
not apply to the InlineRule rhs, consistent with it being
inlined in its *original* form in phase p.
b) Rules/inlinings that only become active *after* p will
not apply to the InlineRule rhs, again to be consistent with
inlining the *original* rhs in phase p.
For example,
f x = ...g...
g y = ...
Here we must not inline g into f's RHS, even when we get to phase 0,
because when f is later inlined into some other module we want the
rule for h to fire.
Similarly, consider
f x = ...g...
g y = ...
and suppose that there are autogenerated specialisations and a strictness
wrapper for g. The specialisations get activation AlwaysActive, and the
strictness wrapper get activation (ActiveAfter 0). So the strictness
wrepper fails the test and won't be inlined into f's InlineRule. That
means f can inline, expose the specialised call to g, so the specialisation
rules can fire.
A note about wrappers
~~~~~~~~~~~~~~~~~~~~~
It's also important not to inline a worker back into a wrapper.
A wrapper looks like
wraper = inline_me (\x -> ...worker... )
Normally, the inline_me prevents the worker getting inlined into
the wrapper (initially, the worker's only call site!). But,
if the wrapper is sure to be called, the strictness analyser will
mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
continuation.
\begin{code}
simplEnvForGHCi :: SimplEnv
simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
SimplGently { sm_rules = False, sm_inline = False }
simplEnvForRules :: SimplEnv
simplEnvForRules = mkSimplEnv allOffSwitchChecker $
SimplGently { sm_rules = True, sm_inline = False }
updModeForInlineRules :: Activation -> SimplifierMode -> SimplifierMode
updModeForInlineRules inline_rule_act current_mode
= case inline_rule_act of
NeverActive -> no_op
AlwaysActive -> mk_gentle current_mode
ActiveBefore {} -> mk_gentle current_mode
ActiveAfter n -> mk_phase n current_mode
where
no_op = SimplGently { sm_rules = False, sm_inline = False }
mk_gentle (SimplGently {}) = current_mode
mk_gentle _ = SimplGently { sm_rules = True, sm_inline = True }
mk_phase n (SimplPhase _ ss) = SimplPhase n ss
mk_phase n (SimplGently {}) = SimplPhase n ["gentle-rules"]
\end{code}
preInlineUnconditionally
~~~~~~~~~~~~~~~~~~~~~~~~
@preInlineUnconditionally@ examines a bndr to see if it is used just
once in a completely safe way, so that it is safe to discard the
binding inline its RHS at the (unique) usage site, REGARDLESS of how
big the RHS might be. If this is the case we don't simplify the RHS
first, but just inline it unsimplified.
This is much better than first simplifying a perhapshuge RHS and then
inlining and resimplifying it. Indeed, it can be at least quadratically
better. Consider
x1 = e1
x2 = e2[x1]
x3 = e3[x2]
...etc...
xN = eN[xN1]
We may end up simplifying e1 N times, e2 N1 times, e3 N3 times etc.
This can happen with cascades of functions too:
f1 = \x1.e1
f2 = \xs.e2[f1]
f3 = \xs.e3[f3]
...etc...
THE MAIN INVARIANT is this:
IF preInlineUnconditionally chooses to inline x = <rhs>
THEN doing the inlining should not change the occurrence
info for the free vars of <rhs>
For example, it's tempting to look at trivial binding like
x = y
and inline it unconditionally. But suppose x is used many times,
but this is the unique occurrence of y. Then inlining x would change
y's occurrence info, which breaks the invariant. It matters: y
might have a BIG rhs, which will now be dup'd at every occurrenc of x.
Even RHSs labelled InlineMe aren't caught here, because there might be
no benefit from inlining at the call site.
[Sept 01] Don't unconditionally inline a toplevel thing, because that
can simply make a static thing into something built dynamically. E.g.
x = (a,b)
main = \s -> h x
[Remember that we treat \s as a oneshot lambda.] No point in
inlining x unless there is something interesting about the call site.
But watch out: if you aren't careful, some useful foldr/build fusion
can be lost (most notably in spectral/hartel/parstof) because the
foldr didn't see the build. Doing the dynamic allocation isn't a big
deal, in fact, but losing the fusion can be. But the right thing here
seems to be to do a callSiteInline based on the fact that there is
something interesting about the call site (it's strict). Hmm. That
seems a bit fragile.
Conclusion: inline top level things gaily until Phase 0 (the last
phase), at which point don't.
Note [pre/postInlineUnconditionally in gentle mode]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Even in gentle mode we want to do preInlineUnconditionally. The
reason is that too little cleanup happens if you don't inline
useonce things. Also a bit of inlining is *good* for full laziness;
it can expose constant subexpressions. Example in
spectral/mandel/Mandel.hs, where the mandelset function gets a useful
letfloat if you inline windowToViewport
However, as usual for Gentle mode, do not inline things that are
inactive in the intial stages. See Note [Gentle mode].
Note [InlineRule and preInlineUnconditionally]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Surprisingly, do not preinlineunconditionally Ids with INLINE pragmas!
Example
f :: Eq a => a -> a
f x = ...
fInt :: Int -> Int
fInt = f Int dEqInt
...fInt...fInt...fInt...
Here f occurs just once, in the RHS of f1. But if we inline it there
we'll lose the opportunity to inline at each of fInt's call sites.
The INLINE pragma will only inline when the application is saturated
for exactly this reason; and we don't want PreInlineUnconditionally
to secondguess it. A live example is Trac #3736.
c.f. Note [InlineRule and postInlineUnconditionally]
Note [Toplevel botomming Ids]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Don't inline toplevel Ids that are bottoming, even if they are used just
once, because FloatOut has gone to some trouble to extract them out.
Inlining them won't make the program run faster!
\begin{code}
preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
preInlineUnconditionally env top_lvl bndr rhs
| not active = False
| isStableUnfolding (idUnfolding bndr) = False
| isTopLevel top_lvl && isBottomingId bndr = False
| opt_SimplNoPreInlining = False
| otherwise = case idOccInfo bndr of
IAmDead -> True
OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
_ -> False
where
phase = getMode env
active = case phase of
SimplGently {} -> isEarlyActive act
SimplPhase n _ -> isActive n act
act = idInlineActivation bndr
try_once in_lam int_cxt
| not in_lam = isNotTopLevel top_lvl || early_phase
| otherwise = int_cxt && canInlineInLam rhs
canInlineInLam (Lit _) = True
canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
canInlineInLam (Note _ e) = canInlineInLam e
canInlineInLam _ = False
early_phase = case phase of
SimplPhase 0 _ -> False
_ -> True
\end{code}
postInlineUnconditionally
~~~~~~~~~~~~~~~~~~~~~~~~~
@postInlineUnconditionally@ decides whether to unconditionally inline
a thing based on the form of its RHS; in particular if it has a
trivial RHS. If so, we can inline and discard the binding altogether.
NB: a loop breaker has must_keep_binding = True and nonloopbreakers
only have *forward* references Hence, it's safe to discard the binding
NOTE: This isn't our last opportunity to inline. We're at the binding
site right now, and we'll get another opportunity when we get to the
ocurrence(s)
Note that we do this unconditional inlining only for trival RHSs.
Don't inline even WHNFs inside lambdas; doing so may simply increase
allocation when the function is called. This isn't the last chance; see
NOTE above.
NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
Because we don't even want to inline them into the RHS of constructor
arguments. See NOTE above
NB: At one time even NOINLINE was ignored here: if the rhs is trivial
it's best to inline it anyway. We often get a=E; b=a from desugaring,
with both a and b marked NOINLINE. But that seems incompatible with
our new view that inlining is like a RULE, so I'm sticking to the 'active'
story for now.
\begin{code}
postInlineUnconditionally
:: SimplEnv -> TopLevelFlag
-> OutId
-> OccInfo
-> OutExpr
-> Unfolding
-> Bool
postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
| not active = False
| isLoopBreaker occ_info = False
| isExportedId bndr = False
| isStableUnfolding unfolding = False
| exprIsTrivial rhs = True
| isTopLevel top_lvl = False
| otherwise
= case occ_info of
OneOcc in_lam _one_br int_cxt
-> smallEnoughToInline unfolding
&& (not in_lam ||
(isCheapUnfolding unfolding && int_cxt))
IAmDead -> True
_ -> False
where
active = case getMode env of
SimplGently {} -> isEarlyActive act
SimplPhase n _ -> isActive n act
act = idInlineActivation bndr
activeUnfolding :: SimplEnv -> IdUnfoldingFun
activeUnfolding env
= case getMode env of
SimplGently { sm_inline = False } -> active_unfolding_minimal
SimplGently { sm_inline = True } -> active_unfolding_gentle
SimplPhase n _ -> active_unfolding n
activeUnfInRule :: SimplEnv -> IdUnfoldingFun
activeUnfInRule env
= case getMode env of
SimplGently { sm_rules = False } -> active_unfolding_minimal
SimplGently { sm_rules = True } -> active_unfolding_gentle
SimplPhase n _ -> active_unfolding n
active_unfolding_minimal :: IdUnfoldingFun
active_unfolding_minimal id
| isCompulsoryUnfolding unf = unf
| otherwise = NoUnfolding
where
unf = realIdUnfolding id
active_unfolding_gentle :: IdUnfoldingFun
active_unfolding_gentle id
| isEarlyActive (idInlineActivation id) = idUnfolding id
| otherwise = NoUnfolding
active_unfolding :: CompilerPhase -> IdUnfoldingFun
active_unfolding n id
| isActive n (idInlineActivation id) = idUnfolding id
| otherwise = NoUnfolding
activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
activeRule dflags env
| not (dopt Opt_EnableRewriteRules dflags)
= Nothing
| otherwise
= case getMode env of
SimplGently { sm_rules = rules_on }
| rules_on -> Just isEarlyActive
| otherwise -> Nothing
SimplPhase n _ -> Just (isActive n)
\end{code}
Note [Top level and postInlineUnconditionally]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We don't do postInlineUnconditionally for toplevel things (exept ones that
are trivial):
* There is no point, because the main goal is to get rid of local
bindings used in multiple case branches.
* Doing so will inline toplevel error expressions that have been
carefully floated out by FloatOut. More generally, it might
replace static allocation with dynamic.
Note [InlineRule and postInlineUnconditionally]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
we lose the unfolding. Example
f = e |> co
Then there's a danger we'll optimise to
f' = e
f = f' |> co
and now postInlineUnconditionally, losing the InlineRule on f. Now f'
won't inline because 'e' is too big.
c.f. Note [InlineRule and preInlineUnconditionally]
%************************************************************************
%* *
Rebuilding a lambda
%* *
%************************************************************************
\begin{code}
mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
mkLam _b [] body
= return body
mkLam _env bndrs body
= do { dflags <- getDOptsSmpl
; mkLam' dflags bndrs body }
where
mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
mkLam' dflags bndrs (Cast body co)
| not (any bad bndrs)
= do { lam <- mkLam' dflags bndrs body
; return (mkCoerce (mkPiTypes bndrs co) lam) }
where
co_vars = tyVarsOfType co
bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
mkLam' dflags bndrs body@(Lam {})
= mkLam' dflags (bndrs ++ bndrs1) body1
where
(bndrs1, body1) = collectBinders body
mkLam' dflags bndrs body
| dopt Opt_DoEtaReduction dflags
, Just etad_lam <- tryEtaReduce bndrs body
= do { tick (EtaReduction (head bndrs))
; return etad_lam }
| dopt Opt_DoLambdaEtaExpansion dflags
, any ok_to_expand bndrs
= do { let body' = etaExpand fun_arity body
fun_arity = exprEtaExpandArity dflags body
; return (mkLams bndrs body') }
| otherwise
= return (mkLams bndrs body)
ok_to_expand :: Var -> Bool
ok_to_expand bndr = isId bndr && not (isDictId bndr)
\end{code}
Note [When to eta expand]
~~~~~~~~~~~~~~~~~~~~~~~~~
We only eta expand if there is at least one nontyvar, nondict
binder. The proximate cause for not etaexpanding dictionary lambdas
was this example:
genMap :: C a => ...
genMap f xs = ...
myMap :: D a => ...
myMap = genMap
Notice that 'genMap' should only inline if applied to two arguments.
In the InlineRule for myMap we'll have the unfolding
(\d -> genMap Int (..d..))
We do not want to etaexpand to
(\d f xs -> genMap Int (..d..) f xs)
because then 'genMap' will inline, and it really shouldn't: at least
as far as the programmer is concerned, it's not applied to two
arguments!
Note [Casts and lambdas]
~~~~~~~~~~~~~~~~~~~~~~~~
Consider
(\x. (\y. e) `cast` g1) `cast` g2
There is a danger here that the two lambdas look separated, and the
full laziness pass might float an expression to between the two.
So this equation in mkLam' floats the g1 out, thus:
(\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
where x:tx.
In general, this floats casts outside lambdas, where (I hope) they
might meet and cancel with some other cast:
\x. e `cast` co ===> (\x. e) `cast` (tx -> co)
/\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
/\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
(if not (g `in` co))
Notice that it works regardless of 'e'. Originally it worked only
if 'e' was itself a lambda, but in some cases that resulted in
fruitless iteration in the simplifier. A good example was when
compiling Text.ParserCombinators.ReadPrec, where we had a definition
like (\x. Get `cast` g)
where Get is a constructor with nonzero arity. Then mkLam etaexpanded
the Get, and the next iteration etareduced it, and then etaexpanded
it again.
Note also the side condition for the case of coercion binders.
It does not make sense to transform
/\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
because the latter is not wellkinded.
%************************************************************************
%* *
\subsection{Floating lets out of big lambdas}
%* *
%************************************************************************
Note [Floating and type abstraction]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this:
x = /\a. C e1 e2
We'd like to float this to
y1 = /\a. e1
y2 = /\a. e2
x = /\a. C (y1 a) (y2 a)
for the usual reasons: we want to inline x rather vigorously.
You may think that this kind of thing is rare. But in some programs it is
common. For example, if you do closure conversion you might get:
data a :-> b = forall e. (e -> a -> b) :$ e
f_cc :: forall a. a :-> a
f_cc = /\a. (\e. id a) :$ ()
Now we really want to inline that f_cc thing so that the
construction of the closure goes away.
So I have elaborated simplLazyBind to understand righthand sides that look
like
/\ a1..an. body
and treat them specially. The real work is done in SimplUtils.abstractFloats,
but there is quite a bit of plumbing in simplLazyBind as well.
The same transformation is good when there are lets in the body:
/\abc -> let(rec) x = e in b
==>
let(rec) x' = /\abc -> let x = x' a b c in e
in
/\abc -> let x = x' a b c in b
This is good because it can turn things like:
let f = /\a -> letrec g = ... g ... in g
into
letrec g' = /\a -> ... g' a ...
in
let f = /\ a -> g' a
which is better. In effect, it means that big lambdas don't impede
letfloating.
This optimisation is CRUCIAL in eliminating the junk introduced by
desugaring mutually recursive definitions. Don't eliminate it lightly!
[May 1999] If we do this transformation *regardless* then we can
end up with some pretty silly stuff. For example,
let
st = /\ s -> let { x1=r1 ; x2=r2 } in ...
in ..
becomes
let y1 = /\s -> r1
y2 = /\s -> r2
st = /\s -> ...[y1 s/x1, y2 s/x2]
in ..
Unless the "..." is a WHNF there is really no point in doing this.
Indeed it can make things worse. Suppose x1 is used strictly,
and is of the form
x1* = case f y of { (a,b) -> e }
If we abstract this wrt the tyvar we then can't do the case inline
as we would normally do.
That's why the whole transformation is part of the same process that
floats letbindings and constructor arguments out of RHSs. In particular,
it is guarded by the doFloatFromRhs call in simplLazyBind.
\begin{code}
abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
abstractFloats main_tvs body_env body
= ASSERT( notNull body_floats )
do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
where
main_tv_set = mkVarSet main_tvs
body_floats = getFloats body_env
empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
abstract subst (NonRec id rhs)
= do { (poly_id, poly_app) <- mk_poly tvs_here id
; let poly_rhs = mkLams tvs_here rhs'
subst' = CoreSubst.extendIdSubst subst id poly_app
; return (subst', (NonRec poly_id poly_rhs)) }
where
rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
tvs_here | any isCoVar main_tvs = main_tvs
| otherwise
= varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyCoVar rhs')
abstract subst (Rec prs)
= do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
| rhs <- rhss]
; return (subst', Rec (poly_ids `zip` poly_rhss)) }
where
(ids,rhss) = unzip prs
tvs_here = main_tvs
mk_poly tvs_here var
= do { uniq <- getUniqueM
; let poly_name = setNameUnique (idName var) uniq
poly_ty = mkForAllTys tvs_here (idType var)
poly_id = transferPolyIdInfo var tvs_here $
mkLocalId poly_name poly_ty
; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
\end{code}
Note [Abstract over coercions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
type variable a. Rather than sort this mess out, we simply bale out and abstract
wrt all the type variables if any of them are coercion variables.
Historical note: if you use letbindings instead of a substitution, beware of this:
%************************************************************************
%* *
prepareAlts
%* *
%************************************************************************
prepareAlts tries these things:
1. Eliminate alternatives that cannot match, including the
DEFAULT alternative.
2. If the DEFAULT alternative can match only one possible constructor,
then make that constructor explicit.
e.g.
case e of x { DEFAULT -> rhs }
===>
case e of x { (a,b) -> rhs }
where the type is a single constructor type. This gives better code
when rhs also scrutinises x or e.
3. Returns a list of the constructors that cannot holds in the
DEFAULT alternative (if there is one)
Here "cannot match" includes knowledge from GADTs
It's a good idea do do this stuff before simplifying the alternatives, to
avoid simplifying alternatives we know can't happen, and to come up with
the list of constructors that are handled, to put into the IdInfo of the
case binder, for use when simplifying the alternatives.
Eliminating the default alternative in (1) isn't so obvious, but it can
happen:
data Colour = Red | Green | Blue
f x = case x of
Red -> ..
Green -> ..
DEFAULT -> h x
h y = case y of
Blue -> ..
DEFAULT -> [ case y of ... ]
If we inline h into f, the default case of the inlined h can't happen.
If we don't notice this, we may end up filtering out *all* the cases
of the inner case y, which give us nowhere to go!
\begin{code}
prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
prepareAlts scrut case_bndr' alts
= do { let (alts_wo_default, maybe_deflt) = findDefault alts
alt_cons = [con | (con,_,_) <- alts_wo_default]
imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
; default_alts <- prepareDefault case_bndr' mb_tc_app
imposs_deflt_cons maybe_deflt
; let trimmed_alts = filterOut impossible_alt alts_wo_default
merged_alts = mergeAlts trimmed_alts default_alts
; return (imposs_deflt_cons, merged_alts) }
where
mb_tc_app = splitTyConApp_maybe (idType case_bndr')
Just (_, inst_tys) = mb_tc_app
imposs_cons = case scrut of
Var v -> otherCons (idUnfolding v)
_ -> []
impossible_alt :: CoreAlt -> Bool
impossible_alt (con, _, _) | con `elem` imposs_cons = True
impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con
impossible_alt _ = False
prepareDefault :: OutId
-> Maybe (TyCon, [Type])
-> [AltCon]
-> Maybe InExpr
-> SimplM [InAlt]
prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
|
isAlgTyCon tycon
, not (isNewTyCon tycon)
, Just all_cons <- tyConDataCons_maybe tycon
, not (null all_cons)
, let imposs_data_cons = [con | DataAlt con <- imposs_cons]
impossible con = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
= case filterOut impossible all_cons of
[] -> return []
[con] ->
do { tick (FillInCaseDefault case_bndr)
; us <- getUniquesM
; let (ex_tvs, co_tvs, arg_ids) =
dataConRepInstPat us con inst_tys
; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
_ -> return [(DEFAULT, [], deflt_rhs)]
| debugIsOn, isAlgTyCon tycon
, null (tyConDataCons tycon)
, not (isFamilyTyCon tycon || isAbstractTyCon tycon)
= pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
$ return [(DEFAULT, [], deflt_rhs)]
prepareDefault _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
= return [(DEFAULT, [], deflt_rhs)]
prepareDefault _case_bndr _bndr_ty _imposs_cons Nothing
= return []
\end{code}
%************************************************************************
%* *
mkCase
%* *
%************************************************************************
mkCase tries these things
1. Merge Nested Cases
case e of b { ==> case e of b {
p1 -> rhs1 p1 -> rhs1
... ...
pm -> rhsm pm -> rhsm
_ -> case b of b' { pn -> let b'=b in rhsn
pn -> rhsn ...
... po -> let b'=b in rhso
po -> rhso _ -> let b'=b in rhsd
_ -> rhsd
}
which merges two cases in one case when
the outer case scrutises the same variable as the outer case. This
transformation is called Case Merging. It avoids that the same
variable is scrutinised multiple times.
2. Eliminate Identity Case
case e of ===> e
True -> True;
False -> False
and similar friends.
3. Merge identical alternatives.
If several alternatives are identical, merge them into
a single DEFAULT alternative. I've occasionally seen this
making a big difference:
case e of =====> case e of
C _ -> f x D v -> ....v....
D v -> ....v.... DEFAULT -> f x
DEFAULT -> f x
The point is that we merge common RHSs, at least for the DEFAULT case.
[One could do something more elaborate but I've never seen it needed.]
To avoid an expensive test, we just merge branches equal to the *first*
alternative; this picks up the common cases
a) all branches equal
b) some branches equal to the DEFAULT (which occurs first)
The case where Merge Identical Alternatives transformation showed up
was like this (base/Foreign/C/Err/Error.lhs):
x | p `is` 1 -> e1
| p `is` 2 -> e2
...etc...
where @is@ was something like
p `is` n = p /= (1) && p == n
This gave rise to a horrible sequence of cases
case p of
(1) -> $j p
1 -> e1
DEFAULT -> $j p
and similarly in cascade for all the join points!
\begin{code}
mkCase, mkCase1, mkCase2
:: DynFlags
-> OutExpr -> OutId
-> [OutAlt]
-> SimplM OutExpr
mkCase dflags scrut outer_bndr ((DEFAULT, _, deflt_rhs) : outer_alts)
| dopt Opt_CaseMerge dflags
, Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
, inner_scrut_var == outer_bndr
= do { tick (CaseMerge outer_bndr)
; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
(con, args, wrap_rhs rhs)
wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
wrapped_alts | isDeadBinder inner_bndr = inner_alts
| otherwise = map wrap_alt inner_alts
merged_alts = mergeAlts outer_alts wrapped_alts
; mkCase1 dflags scrut outer_bndr merged_alts
}
mkCase dflags scrut bndr alts = mkCase1 dflags scrut bndr alts
mkCase1 _dflags scrut case_bndr alts
| all identity_alt alts
= do { tick (CaseIdentity case_bndr)
; return (re_cast scrut) }
where
identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
check_eq DEFAULT _ (Var v) = v == case_bndr
check_eq (LitAlt lit') _ (Lit lit) = lit == lit'
check_eq (DataAlt con) args rhs = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
|| rhs `cheapEqExpr` Var case_bndr
check_eq _ _ _ = False
arg_tys = map Type (tyConAppArgs (idType case_bndr))
de_cast (Cast e _) = e
de_cast e = e
re_cast scrut = case head alts of
(_,_,Cast _ co) -> Cast scrut co
_ -> scrut
mkCase1 dflags scrut case_bndr ((_con1,bndrs1,rhs1) : con_alts)
| all isDeadBinder bndrs1
, length filtered_alts < length con_alts
= do { tick (AltMerge case_bndr)
; mkCase2 dflags scrut case_bndr alts' }
where
alts' = (DEFAULT, [], rhs1) : filtered_alts
filtered_alts = filter keep con_alts
keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
mkCase1 dflags scrut bndr alts = mkCase2 dflags scrut bndr alts
mkCase2 _dflags scrut bndr alts
= return (Case scrut bndr (coreAltsType alts) alts)
\end{code}
Note [Dead binders]
~~~~~~~~~~~~~~~~~~~~
Note that deadness is maintained by the simplifier, so that it is
accurate after simplification as well as before.
Note [Cascading case merge]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Case merging should cascade in one sweep, because it
happens bottomup
case e of a {
DEFAULT -> case a of b
DEFAULT -> case b of c {
DEFAULT -> e
A -> ea
B -> eb
C -> ec
==>
case e of a {
DEFAULT -> case a of b
DEFAULT -> let c = b in e
A -> let c = b in ea
B -> eb
C -> ec
==>
case e of a {
DEFAULT -> let b = a in let c = b in e
A -> let b = a in let c = b in ea
B -> let b = a in eb
C -> ec
However here's a tricky case that we still don't catch, and I don't
see how to catch it in one pass:
case x of c1 { I# a1 ->
case a1 of c2 ->
0 -> ...
DEFAULT -> case x of c3 { I# a2 ->
case a2 of ...
After occurrence analysis (and its binderswap) we get this
case x of c1 { I# a1 ->
let x = c1 in
case a1 of c2 ->
0 -> ...
DEFAULT -> case x of c3 { I# a2 ->
case a2 of ...
When we simplify the inner case x, we'll see that
x=c1=I# a1. So we'll bind a2 to a1, and get
case x of c1 { I# a1 ->
case a1 of c2 ->
0 -> ...
DEFAULT -> case a1 of ...
This is corect, but we can't do a case merge in this sweep
because c2 /= a1. Reason: the binding c1=I# a1 went inwards
without getting changed to c1=I# c2.
I don't think this is worth fixing, even if I knew how. It'll
all come out in the next pass anyway.