dev-lang/ghc: bump up to 8.0.2, no KEYWORDS or binaries yet

Package-Manager: Portage-2.3.3, Repoman-2.3.1
author: Sergei Trofimovich <slyfox@gentoo.org> 2017-01-16 09:45:51 +0000
committer: Sergei Trofimovich <slyfox@gentoo.org> 2017-01-16 09:50:00 +0000
commit: ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2 (patch)
tree: 3d1badf2e0dca0f46173cb7bf791fe8904100757 /dev-lang/ghc/files
parent: media-video/subliminal: amd64 stable wrt bug #605776 (diff)
download: gentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.tar.gz
gentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.tar.bz2
gentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.zip
5 files changed, 288 insertions, 0 deletions
diff --git a/dev-lang/ghc/files/ghc-8.0.1-limit-jN.patch b/dev-lang/ghc/files/ghc-8.0.1-limit-jN.patch
new file mode 100644
index 000000000000..dc5410da3020
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.1-limit-jN.patch
@@ -0,0 +1,50 @@
+commit 501e05bb1b8974fc8b6c9eee86c87c367e87a211
+Author: Sergei Trofimovich <slyfox@gentoo.org>
+Date:   Tue Aug 30 12:10:47 2016 +0100
+
+    GhcMake: limit Capability count to CPU count in parallel mode
+    
+    In Trac #9221 one of problems using high --jobs=<N>
+    is amount of mutator (or GC) threads we crate.
+    
+    We use userspace spinning-and-yielding (see ACQUIRE_SPIN_LOCK)
+    to acess work stealing queues. In case of
+    N-worker-threads > N-CPUs fraction of time when
+    thread holding spin lock gets descheduled by kernel
+    increases. That causes other threads to waste CPU time
+    before giving up CPU.
+    
+    Signed-off-by: Sergei Trofimovich <siarheit@google.com>
+    
+    Test Plan:
+    ghc --make -j8 and -j80 have comparable sys time
+    on a 8-core system.
+    
+    Reviewers: austin, gintas, bgamari, simonmar
+    
+    Reviewed By: bgamari, simonmar
+    
+    Subscribers: thomie
+    
+    Differential Revision: https://phabricator.haskell.org/D2482
+    
+    GHC Trac Issues: #9221
+
+diff --git a/compiler/main/GhcMake.hs b/compiler/main/GhcMake.hs
+index 9dc43cd..905df63 100644
+--- a/compiler/main/GhcMake.hs
++++ b/compiler/main/GhcMake.hs
+@@ -761,7 +761,12 @@ parUpsweep n_jobs old_hpt stable_mods cleanup sccs = do
+ 
+     let updNumCapabilities = liftIO $ do
+             n_capabilities <- getNumCapabilities
+-            unless (n_capabilities /= 1) $ setNumCapabilities n_jobs
++            n_cpus <- getNumProcessors
++            -- Setting number of capabilities more than
++            -- CPU count usually leads to high userspace
++            -- lock contention. Trac #9221
++            let n_caps = min n_jobs n_cpus
++            unless (n_capabilities /= 1) $ setNumCapabilities n_caps
+             return n_capabilities
+     -- Reset the number of capabilities once the upsweep ends.
+     let resetNumCapabilities orig_n = liftIO $ setNumCapabilities orig_n
diff --git a/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch b/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch
new file mode 100644
index 000000000000..b46e57301782
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch
@@ -0,0 +1,65 @@
+commit bdfc5375f219d6def81effda4e57cb56d01fc917
+Author: Sergei Trofimovich <slyfox@gentoo.org>
+Date:   Tue Aug 30 12:10:54 2016 +0100
+
+    rts: enable parallel GC scan of large (32M+) allocation area
+    
+    Parallel GC does not scan large allocation area (-A)
+    effectively as it does not do work stealing from nursery
+    by default.
+    
+    That leads to large imbalance when only one of threads
+    overflows allocation area: most of GC threads finish
+    quickly (as there is not much to collect) and sit idle
+    waiting while single GC thread finishes scan of single
+    allocation area for that thread.
+    
+    The patch enables work stealing for (equivalent of -qb0)
+    allocation area of -A32M or higher.
+    
+    Tested on a highlighting-kate package from Trac #9221
+    
+    On 8-core machine the difference is around 5% faster
+    of wall-clock time. On 24-core VM the speedup is 20%.
+    
+    Signed-off-by: Sergei Trofimovich <siarheit@google.com>
+    
+    Test Plan: measured wall time and GC parallelism on highlighting-kate build
+    
+    Reviewers: austin, bgamari, erikd, simonmar
+    
+    Reviewed By: bgamari, simonmar
+    
+    Subscribers: thomie
+    
+    Differential Revision: https://phabricator.haskell.org/D2483
+    
+    GHC Trac Issues: #9221
+
+diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
+index fda33f0..7a719b9 100644
+--- a/rts/RtsFlags.c
++++ b/rts/RtsFlags.c
+@@ -237,1 +237,1 @@ void initRtsFlagsDefaults(void)
+-    RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
++    RtsFlags.ParFlags.parGcLoadBalancingGen = ~0u; /* auto, based on -A */
+@@ -1398,2 +1390,19 @@ static void normaliseRtsOpts (void)
+     }
+ 
++#ifdef THREADED_RTS
++    if (RtsFlags.ParFlags.parGcLoadBalancingGen == ~0u) {
++        StgWord alloc_area_bytes
++            = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE;
++
++        // If allocation area is larger that CPU cache
++        // we can finish scanning quicker doing work-stealing
++        // scan. Trac #9221
++        // 32M looks big enough not to fit into L2 cache
++        // of popular modern CPUs.
++        if (alloc_area_bytes >= 32 * 1024 * 1024) {
++            RtsFlags.ParFlags.parGcLoadBalancingGen = 0;
++        } else {
++            RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
++        }
++    }
++#endif
diff --git a/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch b/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch
new file mode 100644
index 000000000000..4752f4482678
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch
@@ -0,0 +1,127 @@
+commit 5efbf0d243984444cf352ad6f0d147e226c64498
+Author: Sergei Trofimovich <slyfox@gentoo.org>
+Date:   Thu Sep 1 17:34:58 2016 +0100
+
+    restore -fmax-worker-args handling (Trac #11565)
+    
+    maxWorkerArgs handling was accidentally lost 3 years ago
+    in a major update of demand analysis
+        commit 0831a12ea2fc73c33652eeec1adc79fa19700578
+    
+    Old regression is noticeable as:
+    - code bloat (requires stack reshuffling)
+    - compilation slowdown (more code to optimise/generate)
+    - and increased heap usage (DynFlags unboxing/reboxing?)
+    
+    On a simple compile benchmark this change causes heap
+    allocation drop from 70G don to 67G (ghc perf build).
+    
+    Signed-off-by: Sergei Trofimovich <siarheit@google.com>
+    
+    Reviewers: simonpj, ezyang, goldfire, austin, bgamari
+    
+    Reviewed By: simonpj, ezyang
+    
+    Subscribers: thomie
+    
+    Differential Revision: https://phabricator.haskell.org/D2503
+    
+    GHC Trac Issues: #11565
+
+diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
+index 10d5614..7166f57 100644
+--- a/compiler/specialise/SpecConstr.hs
++++ b/compiler/specialise/SpecConstr.hs
+@@ -29,7 +29,7 @@ import CoreFVs          ( exprsFreeVarsList )
+ import CoreMonad
+ import Literal          ( litIsLifted )
+ import HscTypes         ( ModGuts(..) )
+-import WwLib            ( mkWorkerArgs )
++import WwLib            ( isWorkerSmallEnough, mkWorkerArgs )
+ import DataCon
+ import Coercion         hiding( substCo )
+ import Rules
+@@ -1533,10 +1533,14 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
+ 
+   | Just all_calls <- lookupVarEnv bind_calls fn
+   = -- pprTrace "specialise entry {" (ppr fn <+> ppr (length all_calls)) $
+-    do  { (boring_call, pats) <- callsToPats env specs arg_occs all_calls
+-
++    do  { (boring_call, all_pats) <- callsToPats env specs arg_occs all_calls
+                 -- Bale out if too many specialisations
+-        ; let n_pats      = length pats
++        ; let pats = filter (is_small_enough . fst) all_pats
++              is_small_enough vars = isWorkerSmallEnough (sc_dflags env) vars
++                  -- We are about to construct w/w pair in 'spec_one'.
++                  -- Omit specialisation leading to high arity workers.
++                  -- See Note [Limit w/w arity]
++              n_pats      = length pats
+               spec_count' = n_pats + spec_count
+         ; case sc_count env of
+             Just max | not (sc_force env) && spec_count' > max
+diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs
+index 09bc204..d9460d9 100644
+--- a/compiler/stranal/WwLib.hs
++++ b/compiler/stranal/WwLib.hs
+@@ -8,6 +8,7 @@
+ 
+ module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs
+              , deepSplitProductType_maybe, findTypeShape
++             , isWorkerSmallEnough
+  ) where
+ 
+ #include "HsVersions.h"
+@@ -144,7 +145,8 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots
+               wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var
+               worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args
+ 
+-        ; if useful1 && not (only_one_void_argument) || useful2
++        ; if isWorkerSmallEnough dflags work_args
++             && (useful1 && not only_one_void_argument || useful2)
+           then return (Just (worker_args_dmds, wrapper_body, worker_body))
+           else return Nothing
+         }
+@@ -165,6 +167,12 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots
+       | otherwise
+       = False
+ 
++-- See Note [Limit w/w arity]
++isWorkerSmallEnough :: DynFlags -> [Var] -> Bool
++isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags
++    -- We count only Free variables (isId) to skip Type, Kind
++    -- variables which have no runtime representation.
++
+ {-
+ Note [Always do CPR w/w]
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+@@ -178,6 +186,30 @@ a disaster, because then the enclosing function might say it has the CPR
+ property, but now doesn't and there a cascade of disaster.  A good example
+ is Trac #5920.
+ 
++Note [Limit w/w arity]
++~~~~~~~~~~~~~~~~~~~~~~~~
++Guard against high worker arity as it generates a lot of stack traffic.
++A simplified example is Trac #11565#comment:6
++
++Current strategy is very simple: don't perform w/w transformation at all
++if the result produces a wrapper with arity higher than -fmax-worker-args=.
++
++It is a bit all or nothing, consider
++
++        f (x,y) (a,b,c,d,e ... , z) = rhs
++
++Currently we will remove all w/w ness entirely. But actually we could
++w/w on the (x,y) pair... it's the huge product that is the problem.
++
++Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd
++solve f. But we can get a lot of args from deeply-nested products:
++
++        g (a, (b, (c, (d, ...)))) = rhs
++
++This is harder to spot on an arg-by-arg basis. Previously mkWwStr was
++given some "fuel" saying how many arguments it could add; when we ran
++out of fuel it would stop w/wing.
++Still not very clever because it had a left-right bias.
+ 
+ ************************************************************************
+ *                                                                      *
diff --git a/dev-lang/ghc/files/ghc-8.0.1_rc1-cgen-constify.patch b/dev-lang/ghc/files/ghc-8.0.1_rc1-cgen-constify.patch
new file mode 100644
index 000000000000..877a5827e4ea
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.1_rc1-cgen-constify.patch
@@ -0,0 +1,34 @@
+commit b0cf3ab7a69b878a4335d21a347b56e4b0ca0b7b
+Author: Sergei Trofimovich <slyfox@gentoo.org>
+Date:   Mon Apr 14 19:06:24 2014 +0300
+
+    compiler/cmm/PprC.hs: constify local string literals
+    
+    Consider one-line module
+        module B (v) where v = "hello"
+    in -fvia-C mode it generates code like
+        static char gibberish_str[] = "hello";
+    
+    It uselessly eats data section (precious resource on ia64!).
+    The patch switches genrator to emit:
+        static const char gibberish_str[] = "hello";
+    
+    Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
+
+diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
+index 2398981..fdb578d 100644
+--- a/compiler/cmm/PprC.hs
++++ b/compiler/cmm/PprC.hs
+@@ -112,6 +112,12 @@ pprTop (CmmProc infos clbl _ graph) =
+ 
+ -- We only handle (a) arrays of word-sized things and (b) strings.
+ 
++pprTop (CmmData (Section ReadOnlyData _) (Statics lbl [CmmString str])) =
++  hcat [
++    pprLocalness lbl, ptext (sLit "const char "), ppr lbl,
++    ptext (sLit "[] = "), pprStringInCStyle str, semi
++  ]
++
+ pprTop (CmmData _section (Statics lbl [CmmString str])) =
+   hcat [
+     pprLocalness lbl, ptext (sLit "char "), ppr lbl,
diff --git a/dev-lang/ghc/files/ghc-8.0.2_rc2-old-sphinx.patch b/dev-lang/ghc/files/ghc-8.0.2_rc2-old-sphinx.patch
new file mode 100644
index 000000000000..a4d49d3ef808
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.2_rc2-old-sphinx.patch
@@ -0,0 +1,12 @@
+diff --git a/docs/users_guide/profiling.rst b/docs/users_guide/profiling.rst
+index 4d0bb3a..f980f72 100644
+--- a/docs/users_guide/profiling.rst
++++ b/docs/users_guide/profiling.rst
+@@ -435,7 +435,2 @@ To generate a heap profile from your program:
+ 
+-For example, here is a heap profile produced for the ``sphere`` program
+-from GHC's ``nofib`` benchmark suite,
+-
+-.. image:: images/prof_scc.*
+-
+ You might also want to take a look at
author	Sergei Trofimovich <slyfox@gentoo.org>	2017-01-16 09:45:51 +0000
committer	Sergei Trofimovich <slyfox@gentoo.org>	2017-01-16 09:50:00 +0000
commit	ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2 (patch)
tree	3d1badf2e0dca0f46173cb7bf791fe8904100757 /dev-lang/ghc/files
parent	media-video/subliminal: amd64 stable wrt bug #605776 (diff)
download	gentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.tar.gz gentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.tar.bz2 gentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.zip