%
% (c) The AQUA Project, Glasgow University, 19931998
%
\section[SimplUtils]{The simplifier utilities}
\begin{code}
module SimplUtils (
mkLam, mkCase, prepareAlts, bindCaseBndr,
preInlineUnconditionally, postInlineUnconditionally,
activeInline, activeRule, inlineMode,
SimplCont(..), DupFlag(..), ArgInfo(..),
contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs,
countValArgs, countArgs, splitInlineCont,
mkBoringStop, mkLazyArgStop, contIsRhsOrArg,
interestingCallContext, interestingArgContext,
interestingArg, mkArgInfo,
abstractFloats
) where
#include "HsVersions.h"
import SimplEnv
import DynFlags
import StaticFlags
import CoreSyn
import qualified CoreSubst
import PprCore
import CoreFVs
import CoreUtils
import CoreArity ( etaExpand, exprEtaExpandArity )
import CoreUnfold
import Name
import Id
import Var ( isCoVar )
import NewDemand
import SimplMonad
import Type hiding( substTy )
import Coercion ( coercionKind )
import TyCon
import Unify ( dataConCannotMatch )
import VarSet
import BasicTypes
import Util
import MonadUtils
import Outputable
import FastString
import Data.List
\end{code}
%************************************************************************
%* *
The SimplCont type
%* *
%************************************************************************
A SimplCont allows the simplifier to traverse the expression in a
zipperlike fashion. The SimplCont represents the rest of the expression,
"above" the point of interest.
You can also think of a SimplCont as an "evaluation context", using
that term in the way it is used for operational semantics. This is the
way I usually think of it, For example you'll often see a syntax for
evaluation context looking like
C ::= [] | C e | case C of alts | C `cast` co
That's the kind of thing we are doing here, and I use that syntax in
the comments.
Key points:
* A SimplCont describes a *strict* context (just like
evaluation contexts do). E.g. Just [] is not a SimplCont
* A SimplCont describes a context that *does not* bind
any variables. E.g. \x. [] is not a SimplCont
\begin{code}
data SimplCont
= Stop
CallCtxt
| CoerceIt
OutCoercion
SimplCont
| ApplyTo
DupFlag
InExpr SimplEnv
SimplCont
| Select
DupFlag
InId [InAlt] SimplEnv
SimplCont
| StrictBind
InId [InBndr]
InExpr SimplEnv
SimplCont
| StrictArg
OutExpr
CallCtxt
ArgInfo
SimplCont
data ArgInfo
= ArgInfo {
ai_rules :: Bool,
ai_strs :: [Bool],
ai_discs :: [Int]
}
instance Outputable SimplCont where
ppr (Stop interesting) = ptext (sLit "Stop") <> brackets (ppr interesting)
ppr (ApplyTo dup arg _ cont) = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
) $$ ppr cont
ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
ppr (StrictArg f _ _ cont) = (ptext (sLit "StrictArg") <+> ppr f) $$ ppr cont
ppr (Select dup bndr alts _ cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
(nest 4 (ppr alts)) $$ ppr cont
ppr (CoerceIt co cont) = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
data DupFlag = OkToDup | NoDup
instance Outputable DupFlag where
ppr OkToDup = ptext (sLit "ok")
ppr NoDup = ptext (sLit "nodup")
mkBoringStop :: SimplCont
mkBoringStop = Stop BoringCtxt
mkLazyArgStop :: CallCtxt -> SimplCont
mkLazyArgStop cci = Stop cci
contIsRhsOrArg :: SimplCont -> Bool
contIsRhsOrArg (Stop {}) = True
contIsRhsOrArg (StrictBind {}) = True
contIsRhsOrArg (StrictArg {}) = True
contIsRhsOrArg _ = False
contIsDupable :: SimplCont -> Bool
contIsDupable (Stop {}) = True
contIsDupable (ApplyTo OkToDup _ _ _) = True
contIsDupable (Select OkToDup _ _ _ _) = True
contIsDupable (CoerceIt _ cont) = contIsDupable cont
contIsDupable _ = False
contIsTrivial :: SimplCont -> Bool
contIsTrivial (Stop {}) = True
contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
contIsTrivial (CoerceIt _ cont) = contIsTrivial cont
contIsTrivial _ = False
contResultType :: SimplEnv -> OutType -> SimplCont -> OutType
contResultType env ty cont
= go cont ty
where
subst_ty se ty = substTy (se `setInScope` env) ty
go (Stop {}) ty = ty
go (CoerceIt co cont) _ = go cont (snd (coercionKind co))
go (StrictBind _ bs body se cont) _ = go cont (subst_ty se (exprType (mkLams bs body)))
go (StrictArg fn _ _ cont) _ = go cont (funResultTy (exprType fn))
go (Select _ _ alts se cont) _ = go cont (subst_ty se (coreAltsType alts))
go (ApplyTo _ arg se cont) ty = go cont (apply_to_arg ty arg se)
apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
apply_to_arg ty _ _ = funResultTy ty
countValArgs :: SimplCont -> Int
countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
countValArgs (ApplyTo _ _ _ cont) = 1 + countValArgs cont
countValArgs _ = 0
countArgs :: SimplCont -> Int
countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont
countArgs _ = 0
contArgs :: SimplCont -> ([OutExpr], SimplCont)
contArgs cont = go [] cont
where
go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
go args cont = (reverse args, cont)
dropArgs :: Int -> SimplCont -> SimplCont
dropArgs 0 cont = cont
dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n1) cont
dropArgs n other = pprPanic "dropArgs" (ppr n <+> ppr other)
splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
splitInlineCont (ApplyTo dup (Type ty) se c)
| Just (c1, c2) <- splitInlineCont c = Just (ApplyTo dup (Type ty) se c1, c2)
splitInlineCont cont@(Stop {}) = Just (mkBoringStop, cont)
splitInlineCont cont@(StrictBind {}) = Just (mkBoringStop, cont)
splitInlineCont _ = Nothing
\end{code}
Note [Interesting call context]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We want to avoid inlining an expression where there can't possibly be
any gain, such as in an argument position. Hence, if the continuation
is interesting (eg. a case scrutinee, application etc.) then we
inline, otherwise we don't.
Previously some_benefit used to return True only if the variable was
applied to some value arguments. This didn't work:
let x = _coerce_ (T Int) Int (I# 3) in
case _coerce_ Int (T Int) x of
I# y -> ....
we want to inline x, but can't see that it's a constructor in a case
scrutinee position, and some_benefit is False.
Another example:
dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
.... case dMonadST _@_ x0 of (a,b,c) -> ....
we'd really like to inline dMonadST here, but we *don't* want to
inline if the case expression is just
case x of y { DEFAULT -> ... }
since we can just eliminate this case instead (x is in WHNF). Similar
applies when x is bound to a lambda expression. Hence
contIsInteresting looks for case expressions with just a single
default case.
\begin{code}
interestingCallContext :: SimplCont -> CallCtxt
interestingCallContext cont
= interesting cont
where
interesting (Select _ bndr _ _ _)
| isDeadBinder bndr = CaseCtxt
| otherwise = ArgCtxt False 2
interesting (ApplyTo _ arg _ cont)
| isTypeArg arg = interesting cont
| otherwise = ValAppCtxt
interesting (StrictArg _ cci _ _) = cci
interesting (StrictBind {}) = BoringCtxt
interesting (Stop cci) = cci
interesting (CoerceIt _ cont) = interesting cont
mkArgInfo :: Id
-> Int
-> SimplCont
-> ArgInfo
mkArgInfo fun n_val_args call_cont
| n_val_args < idArity fun
= ArgInfo { ai_rules = False
, ai_strs = vanilla_stricts
, ai_discs = vanilla_discounts }
| otherwise
= ArgInfo { ai_rules = interestingArgContext fun call_cont
, ai_strs = add_type_str (idType fun) arg_stricts
, ai_discs = arg_discounts }
where
vanilla_discounts, arg_discounts :: [Int]
vanilla_discounts = repeat 0
arg_discounts = case idUnfolding fun of
CoreUnfolding _ _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
-> discounts ++ vanilla_discounts
_ -> vanilla_discounts
vanilla_stricts, arg_stricts :: [Bool]
vanilla_stricts = repeat False
arg_stricts
= case splitStrictSig (idNewStrictness fun) of
(demands, result_info)
| not (demands `lengthExceeds` n_val_args)
->
if isBotRes result_info then
map isStrictDmd demands
else
map isStrictDmd demands ++ vanilla_stricts
| otherwise
-> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
<+> ppr n_val_args <+> ppr demands )
vanilla_stricts
add_type_str :: Type -> [Bool] -> [Bool]
add_type_str _ [] = []
add_type_str fun_ty strs
| Just (_, fun_ty') <- splitForAllTy_maybe fun_ty
= add_type_str fun_ty' strs
add_type_str fun_ty (str:strs)
| Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
= (str || isStrictType arg_ty) : add_type_str fun_ty' strs
add_type_str _ strs
= strs
interestingArgContext :: Id -> SimplCont -> Bool
interestingArgContext fn call_cont
= idHasRules fn || go call_cont
where
go (Select {}) = False
go (ApplyTo {}) = False
go (StrictArg _ cci _ _) = interesting cci
go (StrictBind {}) = False
go (CoerceIt _ c) = go c
go (Stop cci) = interesting cci
interesting (ArgCtxt rules _) = rules
interesting _ = False
\end{code}
%************************************************************************
%* *
\subsection{Decisions about inlining}
%* *
%************************************************************************
Inlining is controlled partly by the SimplifierMode switch. This has two
settings:
SimplGently (a) Simplifying before specialiser/full laziness
(b) Simplifiying inside INLINE pragma
(c) Simplifying the LHS of a rule
(d) Simplifying a GHCi expression or Template
Haskell splice
SimplPhase n _ Used at all other times
The key thing about SimplGently is that it does no callsite inlining.
Before full laziness we must be careful not to inline wrappers,
because doing so inhibits floating
e.g. ...(case f x of ...)...
==> ...(case (case x of I# x# -> fw x#) of ...)...
==> ...(case x of I# x# -> case fw x# of ...)...
and now the redex (f x) isn't floatable any more.
The noinlining thing is also important for Template Haskell. You might be
compiling in oneshot mode with O2; but when TH compiles a splice before
running it, we don't want to use O2. Indeed, we don't want to inline
anything, because the bytecode interpreter might get confused about
unboxed tuples and suchlike.
INLINE pragmas
~~~~~~~~~~~~~~
SimplGently is also used as the mode to simplify inside an InlineMe note.
\begin{code}
inlineMode :: SimplifierMode
inlineMode = SimplGently
\end{code}
It really is important to switch off inlinings inside such
expressions. Consider the following example
let f = \pq -> BIG
in
let g = \y -> f y y
in ...g...g...g...g...g...
Now, if that's the ONLY occurrence of f, it will be inlined inside g,
and thence copied multiple times when g is inlined.
This function may be inlinined in other modules, so we
don't want to remove (by inlining) calls to functions that have
specialisations, or that may have transformation rules in an importing
scope.
E.g.
f x = ...g...
and suppose that g is strict *and* has specialisations. If we inline
g's wrapper, we deny f the chance of getting the specialised version
of g when f is inlined at some call site (perhaps in some other
module).
It's also important not to inline a worker back into a wrapper.
A wrapper looks like
wraper = inline_me (\x -> ...worker... )
Normally, the inline_me prevents the worker getting inlined into
the wrapper (initially, the worker's only call site!). But,
if the wrapper is sure to be called, the strictness analyser will
mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
continuation. That's why the keep_inline predicate returns True for
ArgOf continuations. It shouldn't do any harm not to dissolve the
inlineme note under these circumstances.
Note that the result is that we do very little simplification
inside an InlineMe.
all xs = foldr (&&) True xs
any p = all . map p
Problem: any won't get deforested, and so if it's exported and the
importer doesn't use the inlining, (eg passes it as an arg) then we
won't get deforestation at all. We havn't solved this problem yet!
preInlineUnconditionally
~~~~~~~~~~~~~~~~~~~~~~~~
@preInlineUnconditionally@ examines a bndr to see if it is used just
once in a completely safe way, so that it is safe to discard the
binding inline its RHS at the (unique) usage site, REGARDLESS of how
big the RHS might be. If this is the case we don't simplify the RHS
first, but just inline it unsimplified.
This is much better than first simplifying a perhapshuge RHS and then
inlining and resimplifying it. Indeed, it can be at least quadratically
better. Consider
x1 = e1
x2 = e2[x1]
x3 = e3[x2]
...etc...
xN = eN[xN1]
We may end up simplifying e1 N times, e2 N1 times, e3 N3 times etc.
This can happen with cascades of functions too:
f1 = \x1.e1
f2 = \xs.e2[f1]
f3 = \xs.e3[f3]
...etc...
THE MAIN INVARIANT is this:
IF preInlineUnconditionally chooses to inline x = <rhs>
THEN doing the inlining should not change the occurrence
info for the free vars of <rhs>
For example, it's tempting to look at trivial binding like
x = y
and inline it unconditionally. But suppose x is used many times,
but this is the unique occurrence of y. Then inlining x would change
y's occurrence info, which breaks the invariant. It matters: y
might have a BIG rhs, which will now be dup'd at every occurrenc of x.
Even RHSs labelled InlineMe aren't caught here, because there might be
no benefit from inlining at the call site.
[Sept 01] Don't unconditionally inline a toplevel thing, because that
can simply make a static thing into something built dynamically. E.g.
x = (a,b)
main = \s -> h x
[Remember that we treat \s as a oneshot lambda.] No point in
inlining x unless there is something interesting about the call site.
But watch out: if you aren't careful, some useful foldr/build fusion
can be lost (most notably in spectral/hartel/parstof) because the
foldr didn't see the build. Doing the dynamic allocation isn't a big
deal, in fact, but losing the fusion can be. But the right thing here
seems to be to do a callSiteInline based on the fact that there is
something interesting about the call site (it's strict). Hmm. That
seems a bit fragile.
Conclusion: inline top level things gaily until Phase 0 (the last
phase), at which point don't.
\begin{code}
preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
preInlineUnconditionally env top_lvl bndr rhs
| not active = False
| opt_SimplNoPreInlining = False
| otherwise = case idOccInfo bndr of
IAmDead -> True
OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
_ -> False
where
phase = getMode env
active = case phase of
SimplGently -> isAlwaysActive act
SimplPhase n _ -> isActive n act
act = idInlineActivation bndr
try_once in_lam int_cxt
| not in_lam = isNotTopLevel top_lvl || early_phase
| otherwise = int_cxt && canInlineInLam rhs
canInlineInLam (Lit _) = True
canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
canInlineInLam (Note _ e) = canInlineInLam e
canInlineInLam _ = False
early_phase = case phase of
SimplPhase 0 _ -> False
_ -> True
\end{code}
postInlineUnconditionally
~~~~~~~~~~~~~~~~~~~~~~~~~
@postInlineUnconditionally@ decides whether to unconditionally inline
a thing based on the form of its RHS; in particular if it has a
trivial RHS. If so, we can inline and discard the binding altogether.
NB: a loop breaker has must_keep_binding = True and nonloopbreakers
only have *forward* references Hence, it's safe to discard the binding
NOTE: This isn't our last opportunity to inline. We're at the binding
site right now, and we'll get another opportunity when we get to the
ocurrence(s)
Note that we do this unconditional inlining only for trival RHSs.
Don't inline even WHNFs inside lambdas; doing so may simply increase
allocation when the function is called. This isn't the last chance; see
NOTE above.
NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
Because we don't even want to inline them into the RHS of constructor
arguments. See NOTE above
NB: At one time even NOINLINE was ignored here: if the rhs is trivial
it's best to inline it anyway. We often get a=E; b=a from desugaring,
with both a and b marked NOINLINE. But that seems incompatible with
our new view that inlining is like a RULE, so I'm sticking to the 'active'
story for now.
\begin{code}
postInlineUnconditionally
:: SimplEnv -> TopLevelFlag
-> InId
-> OccInfo
-> OutExpr
-> Unfolding
-> Bool
postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
| not active = False
| isLoopBreaker occ_info = False
| isExportedId bndr = False
| exprIsTrivial rhs = True
| otherwise
= case occ_info of
OneOcc in_lam _one_br int_cxt
-> smallEnoughToInline unfolding
&& ((isNotTopLevel top_lvl && not in_lam) ||
(isCheapUnfolding unfolding && int_cxt))
IAmDead -> True
_ -> False
where
active = case getMode env of
SimplGently -> isAlwaysActive act
SimplPhase n _ -> isActive n act
act = idInlineActivation bndr
activeInline :: SimplEnv -> OutId -> Bool
activeInline env id
= case getMode env of
SimplGently -> False
SimplPhase n _ -> isActive n act
where
act = idInlineActivation id
activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
activeRule dflags env
| not (dopt Opt_EnableRewriteRules dflags)
= Nothing
| otherwise
= case getMode env of
SimplGently -> Just isAlwaysActive
SimplPhase n _ -> Just (isActive n)
\end{code}
%************************************************************************
%* *
Rebuilding a lambda
%* *
%************************************************************************
\begin{code}
mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
mkLam _b [] body
= return body
mkLam _env bndrs body
= do { dflags <- getDOptsSmpl
; mkLam' dflags bndrs body }
where
mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
mkLam' dflags bndrs (Cast body co)
| not (any bad bndrs)
= do { lam <- mkLam' dflags bndrs body
; return (mkCoerce (mkPiTypes bndrs co) lam) }
where
co_vars = tyVarsOfType co
bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
mkLam' dflags bndrs body
| dopt Opt_DoEtaReduction dflags,
Just etad_lam <- tryEtaReduce bndrs body
= do { tick (EtaReduction (head bndrs))
; return etad_lam }
| dopt Opt_DoLambdaEtaExpansion dflags,
any isRuntimeVar bndrs
= do { let body' = tryEtaExpansion dflags body
; return (mkLams bndrs body') }
| otherwise
= return (mkLams bndrs body)
\end{code}
Note [Casts and lambdas]
~~~~~~~~~~~~~~~~~~~~~~~~
Consider
(\x. (\y. e) `cast` g1) `cast` g2
There is a danger here that the two lambdas look separated, and the
full laziness pass might float an expression to between the two.
So this equation in mkLam' floats the g1 out, thus:
(\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
where x:tx.
In general, this floats casts outside lambdas, where (I hope) they
might meet and cancel with some other cast:
\x. e `cast` co ===> (\x. e) `cast` (tx -> co)
/\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
/\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
(if not (g `in` co))
Notice that it works regardless of 'e'. Originally it worked only
if 'e' was itself a lambda, but in some cases that resulted in
fruitless iteration in the simplifier. A good example was when
compiling Text.ParserCombinators.ReadPrec, where we had a definition
like (\x. Get `cast` g)
where Get is a constructor with nonzero arity. Then mkLam etaexpanded
the Get, and the next iteration etareduced it, and then etaexpanded
it again.
Note also the side condition for the case of coercion binders.
It does not make sense to transform
/\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
because the latter is not wellkinded.
%************************************************************************
%* *
Eta reduction
%* *
%************************************************************************
Note [Eta reduction conditions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We try for eta reduction here, but *only* if we get all the way to an
trivial expression. We don't want to remove extra lambdas unless we
are going to avoid allocating this thing altogether.
There are some particularly delicate points here:
* Eta reduction is not valid in general:
\x. bot /= bot
This matters, partly for oldfashioned correctness reasons but,
worse, getting it wrong can yield a seg fault. Consider
f = \x.f x
h y = case (case y of { True -> f `seq` True; False -> False }) of
True -> ...; False -> ...
If we (unsoundly) etareduce f to get f=f, the strictness analyser
says f=bottom, and replaces the (f `seq` True) with just
(f `cast` unsafeco). BUT, as thing stand, 'f' got arity 1, and it
*keeps* arity 1 (perhaps also wrongly). So CorePrep etaexpands
the definition again, so that it does not termninate after all.
Result: segfault because the boolean case actually gets a function value.
See Trac #1947.
So it's important to to the right thing.
* Note [Arity care]: we need to be careful if we just look at f's
arity. Currently (Dec07), f's arity is visible in its own RHS (see
Note [Arity robustness] in SimplEnv) so we must *not* trust the
arity when checking that 'f' is a value. Otherwise we will
etareduce
f = \x. f x
to
f = f
Which might change a terminiating program (think (f `seq` e)) to a
nonterminating one. So we check for being a loop breaker first.
However for GlobalIds we can look at the arity; and for primops we
must, since they have no unfolding.
* Regardless of whether 'f' is a value, we always want to
reduce (/\a -> f a) to f
This came up in a RULE: foldr (build (/\a -> g a))
did not match foldr (build (/\b -> ...something complex...))
The type checker can insert these etaexpanded versions,
with both type and dictionary lambdas; hence the slightly
adhoc isDictId
* Never *reduce* arity. For example
f = \xy. g x y
Then if h has arity 1 we don't want to etareduce because then
f's arity would decrease, and that is bad
These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
Alas.
\begin{code}
tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
tryEtaReduce bndrs body
= go (reverse bndrs) body
where
incoming_arity = count isId bndrs
go (b : bs) (App fun arg) | ok_arg b arg = go bs fun
go [] fun | ok_fun fun = Just fun
go _ _ = Nothing
ok_fun (App fun (Type ty))
| not (any (`elemVarSet` tyVarsOfType ty) bndrs)
= ok_fun fun
ok_fun (Var fun_id)
= not (fun_id `elem` bndrs)
&& (ok_fun_id fun_id || all ok_lam bndrs)
ok_fun _fun = False
ok_fun_id fun = fun_arity fun >= incoming_arity
fun_arity fun
| isLocalId fun && isLoopBreaker (idOccInfo fun) = 0
| otherwise = idArity fun
ok_lam v = isTyVar v || isDictId v
ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
\end{code}
%************************************************************************
%* *
Eta expansion
%* *
%************************************************************************
We go for:
f = \x1..xn -> N ==> f = \x1..xn y1..ym -> N y1..ym
(n >= 0)
where (in both cases)
* The xi can include type variables
* The yi are all value variables
* N is a NORMAL FORM (i.e. no redexes anywhere)
wanting a suitable number of extra args.
The biggest reason for doing this is for cases like
f = \x -> case x of
True -> \y -> e1
False -> \y -> e2
Here we want to get the lambdas together. A good exmaple is the nofib
program fibheaps, which gets 25% more allocation if you don't do this
etaexpansion.
We may have to sandwich some coerces between the lambdas
to make the types work. exprEtaExpandArity looks through coerces
when computing arity; and etaExpand adds the coerces as necessary when
actually computing the expansion.
\begin{code}
tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
tryEtaExpansion dflags body
= etaExpand fun_arity body
where
fun_arity = exprEtaExpandArity dflags body
\end{code}
%************************************************************************
%* *
\subsection{Floating lets out of big lambdas}
%* *
%************************************************************************
Note [Floating and type abstraction]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this:
x = /\a. C e1 e2
We'd like to float this to
y1 = /\a. e1
y2 = /\a. e2
x = /\a. C (y1 a) (y2 a)
for the usual reasons: we want to inline x rather vigorously.
You may think that this kind of thing is rare. But in some programs it is
common. For example, if you do closure conversion you might get:
data a :-> b = forall e. (e -> a -> b) :$ e
f_cc :: forall a. a :-> a
f_cc = /\a. (\e. id a) :$ ()
Now we really want to inline that f_cc thing so that the
construction of the closure goes away.
So I have elaborated simplLazyBind to understand righthand sides that look
like
/\ a1..an. body
and treat them specially. The real work is done in SimplUtils.abstractFloats,
but there is quite a bit of plumbing in simplLazyBind as well.
The same transformation is good when there are lets in the body:
/\abc -> let(rec) x = e in b
==>
let(rec) x' = /\abc -> let x = x' a b c in e
in
/\abc -> let x = x' a b c in b
This is good because it can turn things like:
let f = /\a -> letrec g = ... g ... in g
into
letrec g' = /\a -> ... g' a ...
in
let f = /\ a -> g' a
which is better. In effect, it means that big lambdas don't impede
letfloating.
This optimisation is CRUCIAL in eliminating the junk introduced by
desugaring mutually recursive definitions. Don't eliminate it lightly!
[May 1999] If we do this transformation *regardless* then we can
end up with some pretty silly stuff. For example,
let
st = /\ s -> let { x1=r1 ; x2=r2 } in ...
in ..
becomes
let y1 = /\s -> r1
y2 = /\s -> r2
st = /\s -> ...[y1 s/x1, y2 s/x2]
in ..
Unless the "..." is a WHNF there is really no point in doing this.
Indeed it can make things worse. Suppose x1 is used strictly,
and is of the form
x1* = case f y of { (a,b) -> e }
If we abstract this wrt the tyvar we then can't do the case inline
as we would normally do.
That's why the whole transformation is part of the same process that
floats letbindings and constructor arguments out of RHSs. In particular,
it is guarded by the doFloatFromRhs call in simplLazyBind.
\begin{code}
abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
abstractFloats main_tvs body_env body
= ASSERT( notNull body_floats )
do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
; return (float_binds, CoreSubst.substExpr subst body) }
where
main_tv_set = mkVarSet main_tvs
body_floats = getFloats body_env
empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
abstract subst (NonRec id rhs)
= do { (poly_id, poly_app) <- mk_poly tvs_here id
; let poly_rhs = mkLams tvs_here rhs'
subst' = CoreSubst.extendIdSubst subst id poly_app
; return (subst', (NonRec poly_id poly_rhs)) }
where
rhs' = CoreSubst.substExpr subst rhs
tvs_here | any isCoVar main_tvs = main_tvs
| otherwise
= varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
abstract subst (Rec prs)
= do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
; return (subst', Rec (poly_ids `zip` poly_rhss)) }
where
(ids,rhss) = unzip prs
tvs_here = main_tvs
mk_poly tvs_here var
= do { uniq <- getUniqueM
; let poly_name = setNameUnique (idName var) uniq
poly_ty = mkForAllTys tvs_here (idType var)
poly_id = transferPolyIdInfo var tvs_here $
mkLocalId poly_name poly_ty
; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
\end{code}
Note [Abstract over coercions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
type variable a. Rather than sort this mess out, we simply bale out and abstract
wrt all the type variables if any of them are coercion variables.
Historical note: if you use letbindings instead of a substitution, beware of this:
%************************************************************************
%* *
prepareAlts
%* *
%************************************************************************
prepareAlts tries these things:
1. If several alternatives are identical, merge them into
a single DEFAULT alternative. I've occasionally seen this
making a big difference:
case e of =====> case e of
C _ -> f x D v -> ....v....
D v -> ....v.... DEFAULT -> f x
DEFAULT -> f x
The point is that we merge common RHSs, at least for the DEFAULT case.
[One could do something more elaborate but I've never seen it needed.]
To avoid an expensive test, we just merge branches equal to the *first*
alternative; this picks up the common cases
a) all branches equal
b) some branches equal to the DEFAULT (which occurs first)
2. Case merging:
case e of b { ==> case e of b {
p1 -> rhs1 p1 -> rhs1
... ...
pm -> rhsm pm -> rhsm
_ -> case b of b' { pn -> let b'=b in rhsn
pn -> rhsn ...
... po -> let b'=b in rhso
po -> rhso _ -> let b'=b in rhsd
_ -> rhsd
}
which merges two cases in one case when
the outer case scrutises the same variable as the outer case This
transformation is called Case Merging. It avoids that the same
variable is scrutinised multiple times.
The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
x | p `is` 1 -> e1
| p `is` 2 -> e2
...etc...
where @is@ was something like
p `is` n = p /= (1) && p == n
This gave rise to a horrible sequence of cases
case p of
(1) -> $j p
1 -> e1
DEFAULT -> $j p
and similarly in cascade for all the join points!
Note [Dead binders]
~~~~~~~~~~~~~~~~~~~~
We do this *here*, looking at unsimplified alternatives, because we
have to check that r doesn't mention the variables bound by the
pattern in each alternative, so the binderinfo is rather useful.
\begin{code}
prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
prepareAlts env scrut case_bndr' alts
= do { dflags <- getDOptsSmpl
; alts <- combineIdenticalAlts case_bndr' alts
; let (alts_wo_default, maybe_deflt) = findDefault alts
alt_cons = [con | (con,_,_) <- alts_wo_default]
imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app
imposs_deflt_cons maybe_deflt
; let trimmed_alts = filterOut impossible_alt alts_wo_default
merged_alts = mergeAlts trimmed_alts default_alts
; return (imposs_deflt_cons, merged_alts) }
where
mb_tc_app = splitTyConApp_maybe (idType case_bndr')
Just (_, inst_tys) = mb_tc_app
imposs_cons = case scrut of
Var v -> otherCons (idUnfolding v)
_ -> []
impossible_alt :: CoreAlt -> Bool
impossible_alt (con, _, _) | con `elem` imposs_cons = True
impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con
impossible_alt _ = False
combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
| all isDeadBinder bndrs1,
length filtered_alts < length con_alts
= do { tick (AltMerge case_bndr)
; return ((DEFAULT, [], rhs1) : filtered_alts) }
where
filtered_alts = filter keep con_alts
keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
combineIdenticalAlts _ alts = return alts
prepareDefault :: DynFlags
-> SimplEnv
-> OutId
-> Maybe (TyCon, [Type])
-> [AltCon]
-> Maybe InExpr
-> SimplM [InAlt]
prepareDefault dflags env outer_bndr _bndr_ty imposs_cons (Just deflt_rhs)
| dopt Opt_CaseMerge dflags
, Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
, DoneId inner_scrut_var' <- substId env inner_scrut_var
, inner_scrut_var' == outer_bndr
= do { tick (CaseMerge outer_bndr)
; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts,
not (con `elem` imposs_cons) ]
}
prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
|
isAlgTyCon tycon
, not (isNewTyCon tycon)
, Just all_cons <- tyConDataCons_maybe tycon
, not (null all_cons)
, let imposs_data_cons = [con | DataAlt con <- imposs_cons]
impossible con = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
= case filterOut impossible all_cons of
[] -> return []
[con] ->
do { tick (FillInCaseDefault case_bndr)
; us <- getUniquesM
; let (ex_tvs, co_tvs, arg_ids) =
dataConRepInstPat us con inst_tys
; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
_ -> return [(DEFAULT, [], deflt_rhs)]
| debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
= pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
$ return [(DEFAULT, [], deflt_rhs)]
prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
= return [(DEFAULT, [], deflt_rhs)]
prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons Nothing
= return []
\end{code}
=================================================================================
mkCase tries these things
1. Eliminate the case altogether if possible
2. Caseidentity:
case e of ===> e
True -> True;
False -> False
and similar friends.
\begin{code}
mkCase :: OutExpr -> OutId -> [OutAlt]
-> SimplM OutExpr
mkCase scrut case_bndr alts
| all identity_alt alts
= do tick (CaseIdentity case_bndr)
return (re_cast scrut)
where
identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
check_eq DEFAULT _ (Var v) = v == case_bndr
check_eq (LitAlt lit') _ (Lit lit) = lit == lit'
check_eq (DataAlt con) args rhs = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
|| rhs `cheapEqExpr` Var case_bndr
check_eq _ _ _ = False
arg_tys = map Type (tyConAppArgs (idType case_bndr))
de_cast (Cast e _) = e
de_cast e = e
re_cast scrut = case head alts of
(_,_,Cast _ co) -> Cast scrut co
_ -> scrut
mkCase scrut bndr alts = return (Case scrut bndr (coreAltsType alts) alts)
\end{code}
When adding auxiliary bindings for the case binder, it's worth checking if
its dead, because it often is, and occasionally these mkCase transformations
cascade rather nicely.
\begin{code}
bindCaseBndr :: Id -> CoreExpr -> CoreExpr -> CoreExpr
bindCaseBndr bndr rhs body
| isDeadBinder bndr = body
| otherwise = bindNonRec bndr rhs body
\end{code}