summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Trofimovich <slyfox@gentoo.org>2017-01-16 09:45:51 +0000
committerSergei Trofimovich <slyfox@gentoo.org>2017-01-16 09:50:00 +0000
commitef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2 (patch)
tree3d1badf2e0dca0f46173cb7bf791fe8904100757 /dev-lang/ghc/files
parentmedia-video/subliminal: amd64 stable wrt bug #605776 (diff)
downloadgentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.tar.gz
gentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.tar.bz2
gentoo-ef416f3d1295dfc8fb4a0638a3c5f0cab4f9bab2.zip
dev-lang/ghc: bump up to 8.0.2, no KEYWORDS or binaries yet
Package-Manager: Portage-2.3.3, Repoman-2.3.1
Diffstat (limited to 'dev-lang/ghc/files')
-rw-r--r--dev-lang/ghc/files/ghc-8.0.1-limit-jN.patch50
-rw-r--r--dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch65
-rw-r--r--dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch127
-rw-r--r--dev-lang/ghc/files/ghc-8.0.1_rc1-cgen-constify.patch34
-rw-r--r--dev-lang/ghc/files/ghc-8.0.2_rc2-old-sphinx.patch12
5 files changed, 288 insertions, 0 deletions
diff --git a/dev-lang/ghc/files/ghc-8.0.1-limit-jN.patch b/dev-lang/ghc/files/ghc-8.0.1-limit-jN.patch
new file mode 100644
index 000000000000..dc5410da3020
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.1-limit-jN.patch
@@ -0,0 +1,50 @@
+commit 501e05bb1b8974fc8b6c9eee86c87c367e87a211
+Author: Sergei Trofimovich <slyfox@gentoo.org>
+Date: Tue Aug 30 12:10:47 2016 +0100
+
+ GhcMake: limit Capability count to CPU count in parallel mode
+
+ In Trac #9221 one of problems using high --jobs=<N>
+ is amount of mutator (or GC) threads we crate.
+
+ We use userspace spinning-and-yielding (see ACQUIRE_SPIN_LOCK)
+ to acess work stealing queues. In case of
+ N-worker-threads > N-CPUs fraction of time when
+ thread holding spin lock gets descheduled by kernel
+ increases. That causes other threads to waste CPU time
+ before giving up CPU.
+
+ Signed-off-by: Sergei Trofimovich <siarheit@google.com>
+
+ Test Plan:
+ ghc --make -j8 and -j80 have comparable sys time
+ on a 8-core system.
+
+ Reviewers: austin, gintas, bgamari, simonmar
+
+ Reviewed By: bgamari, simonmar
+
+ Subscribers: thomie
+
+ Differential Revision: https://phabricator.haskell.org/D2482
+
+ GHC Trac Issues: #9221
+
+diff --git a/compiler/main/GhcMake.hs b/compiler/main/GhcMake.hs
+index 9dc43cd..905df63 100644
+--- a/compiler/main/GhcMake.hs
++++ b/compiler/main/GhcMake.hs
+@@ -761,7 +761,12 @@ parUpsweep n_jobs old_hpt stable_mods cleanup sccs = do
+
+ let updNumCapabilities = liftIO $ do
+ n_capabilities <- getNumCapabilities
+- unless (n_capabilities /= 1) $ setNumCapabilities n_jobs
++ n_cpus <- getNumProcessors
++ -- Setting number of capabilities more than
++ -- CPU count usually leads to high userspace
++ -- lock contention. Trac #9221
++ let n_caps = min n_jobs n_cpus
++ unless (n_capabilities /= 1) $ setNumCapabilities n_caps
+ return n_capabilities
+ -- Reset the number of capabilities once the upsweep ends.
+ let resetNumCapabilities orig_n = liftIO $ setNumCapabilities orig_n
diff --git a/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch b/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch
new file mode 100644
index 000000000000..b46e57301782
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch
@@ -0,0 +1,65 @@
+commit bdfc5375f219d6def81effda4e57cb56d01fc917
+Author: Sergei Trofimovich <slyfox@gentoo.org>
+Date: Tue Aug 30 12:10:54 2016 +0100
+
+ rts: enable parallel GC scan of large (32M+) allocation area
+
+ Parallel GC does not scan large allocation area (-A)
+ effectively as it does not do work stealing from nursery
+ by default.
+
+ That leads to large imbalance when only one of threads
+ overflows allocation area: most of GC threads finish
+ quickly (as there is not much to collect) and sit idle
+ waiting while single GC thread finishes scan of single
+ allocation area for that thread.
+
+ The patch enables work stealing for (equivalent of -qb0)
+ allocation area of -A32M or higher.
+
+ Tested on a highlighting-kate package from Trac #9221
+
+ On 8-core machine the difference is around 5% faster
+ of wall-clock time. On 24-core VM the speedup is 20%.
+
+ Signed-off-by: Sergei Trofimovich <siarheit@google.com>
+
+ Test Plan: measured wall time and GC parallelism on highlighting-kate build
+
+ Reviewers: austin, bgamari, erikd, simonmar
+
+ Reviewed By: bgamari, simonmar
+
+ Subscribers: thomie
+
+ Differential Revision: https://phabricator.haskell.org/D2483
+
+ GHC Trac Issues: #9221
+
+diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
+index fda33f0..7a719b9 100644
+--- a/rts/RtsFlags.c
++++ b/rts/RtsFlags.c
+@@ -237,1 +237,1 @@ void initRtsFlagsDefaults(void)
+- RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
++ RtsFlags.ParFlags.parGcLoadBalancingGen = ~0u; /* auto, based on -A */
+@@ -1398,2 +1390,19 @@ static void normaliseRtsOpts (void)
+ }
+
++#ifdef THREADED_RTS
++ if (RtsFlags.ParFlags.parGcLoadBalancingGen == ~0u) {
++ StgWord alloc_area_bytes
++ = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE;
++
++ // If allocation area is larger that CPU cache
++ // we can finish scanning quicker doing work-stealing
++ // scan. Trac #9221
++ // 32M looks big enough not to fit into L2 cache
++ // of popular modern CPUs.
++ if (alloc_area_bytes >= 32 * 1024 * 1024) {
++ RtsFlags.ParFlags.parGcLoadBalancingGen = 0;
++ } else {
++ RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
++ }
++ }
++#endif
diff --git a/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch b/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch
new file mode 100644
index 000000000000..4752f4482678
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch
@@ -0,0 +1,127 @@
+commit 5efbf0d243984444cf352ad6f0d147e226c64498
+Author: Sergei Trofimovich <slyfox@gentoo.org>
+Date: Thu Sep 1 17:34:58 2016 +0100
+
+ restore -fmax-worker-args handling (Trac #11565)
+
+ maxWorkerArgs handling was accidentally lost 3 years ago
+ in a major update of demand analysis
+ commit 0831a12ea2fc73c33652eeec1adc79fa19700578
+
+ Old regression is noticeable as:
+ - code bloat (requires stack reshuffling)
+ - compilation slowdown (more code to optimise/generate)
+ - and increased heap usage (DynFlags unboxing/reboxing?)
+
+ On a simple compile benchmark this change causes heap
+ allocation drop from 70G don to 67G (ghc perf build).
+
+ Signed-off-by: Sergei Trofimovich <siarheit@google.com>
+
+ Reviewers: simonpj, ezyang, goldfire, austin, bgamari
+
+ Reviewed By: simonpj, ezyang
+
+ Subscribers: thomie
+
+ Differential Revision: https://phabricator.haskell.org/D2503
+
+ GHC Trac Issues: #11565
+
+diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
+index 10d5614..7166f57 100644
+--- a/compiler/specialise/SpecConstr.hs
++++ b/compiler/specialise/SpecConstr.hs
+@@ -29,7 +29,7 @@ import CoreFVs ( exprsFreeVarsList )
+ import CoreMonad
+ import Literal ( litIsLifted )
+ import HscTypes ( ModGuts(..) )
+-import WwLib ( mkWorkerArgs )
++import WwLib ( isWorkerSmallEnough, mkWorkerArgs )
+ import DataCon
+ import Coercion hiding( substCo )
+ import Rules
+@@ -1533,10 +1533,14 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
+
+ | Just all_calls <- lookupVarEnv bind_calls fn
+ = -- pprTrace "specialise entry {" (ppr fn <+> ppr (length all_calls)) $
+- do { (boring_call, pats) <- callsToPats env specs arg_occs all_calls
+-
++ do { (boring_call, all_pats) <- callsToPats env specs arg_occs all_calls
+ -- Bale out if too many specialisations
+- ; let n_pats = length pats
++ ; let pats = filter (is_small_enough . fst) all_pats
++ is_small_enough vars = isWorkerSmallEnough (sc_dflags env) vars
++ -- We are about to construct w/w pair in 'spec_one'.
++ -- Omit specialisation leading to high arity workers.
++ -- See Note [Limit w/w arity]
++ n_pats = length pats
+ spec_count' = n_pats + spec_count
+ ; case sc_count env of
+ Just max | not (sc_force env) && spec_count' > max
+diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs
+index 09bc204..d9460d9 100644
+--- a/compiler/stranal/WwLib.hs
++++ b/compiler/stranal/WwLib.hs
+@@ -8,6 +8,7 @@
+
+ module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs
+ , deepSplitProductType_maybe, findTypeShape
++ , isWorkerSmallEnough
+ ) where
+
+ #include "HsVersions.h"
+@@ -144,7 +145,8 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots
+ wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var
+ worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args
+
+- ; if useful1 && not (only_one_void_argument) || useful2
++ ; if isWorkerSmallEnough dflags work_args
++ && (useful1 && not only_one_void_argument || useful2)
+ then return (Just (worker_args_dmds, wrapper_body, worker_body))
+ else return Nothing
+ }
+@@ -165,6 +167,12 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots
+ | otherwise
+ = False
+
++-- See Note [Limit w/w arity]
++isWorkerSmallEnough :: DynFlags -> [Var] -> Bool
++isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags
++ -- We count only Free variables (isId) to skip Type, Kind
++ -- variables which have no runtime representation.
++
+ {-
+ Note [Always do CPR w/w]
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+@@ -178,6 +186,30 @@ a disaster, because then the enclosing function might say it has the CPR
+ property, but now doesn't and there a cascade of disaster. A good example
+ is Trac #5920.
+
++Note [Limit w/w arity]
++~~~~~~~~~~~~~~~~~~~~~~~~
++Guard against high worker arity as it generates a lot of stack traffic.
++A simplified example is Trac #11565#comment:6
++
++Current strategy is very simple: don't perform w/w transformation at all
++if the result produces a wrapper with arity higher than -fmax-worker-args=.
++
++It is a bit all or nothing, consider
++
++ f (x,y) (a,b,c,d,e ... , z) = rhs
++
++Currently we will remove all w/w ness entirely. But actually we could
++w/w on the (x,y) pair... it's the huge product that is the problem.
++
++Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd
++solve f. But we can get a lot of args from deeply-nested products:
++
++ g (a, (b, (c, (d, ...)))) = rhs
++
++This is harder to spot on an arg-by-arg basis. Previously mkWwStr was
++given some "fuel" saying how many arguments it could add; when we ran
++out of fuel it would stop w/wing.
++Still not very clever because it had a left-right bias.
+
+ ************************************************************************
+ * *
diff --git a/dev-lang/ghc/files/ghc-8.0.1_rc1-cgen-constify.patch b/dev-lang/ghc/files/ghc-8.0.1_rc1-cgen-constify.patch
new file mode 100644
index 000000000000..877a5827e4ea
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.1_rc1-cgen-constify.patch
@@ -0,0 +1,34 @@
+commit b0cf3ab7a69b878a4335d21a347b56e4b0ca0b7b
+Author: Sergei Trofimovich <slyfox@gentoo.org>
+Date: Mon Apr 14 19:06:24 2014 +0300
+
+ compiler/cmm/PprC.hs: constify local string literals
+
+ Consider one-line module
+ module B (v) where v = "hello"
+ in -fvia-C mode it generates code like
+ static char gibberish_str[] = "hello";
+
+ It uselessly eats data section (precious resource on ia64!).
+ The patch switches genrator to emit:
+ static const char gibberish_str[] = "hello";
+
+ Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
+
+diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
+index 2398981..fdb578d 100644
+--- a/compiler/cmm/PprC.hs
++++ b/compiler/cmm/PprC.hs
+@@ -112,6 +112,12 @@ pprTop (CmmProc infos clbl _ graph) =
+
+ -- We only handle (a) arrays of word-sized things and (b) strings.
+
++pprTop (CmmData (Section ReadOnlyData _) (Statics lbl [CmmString str])) =
++ hcat [
++ pprLocalness lbl, ptext (sLit "const char "), ppr lbl,
++ ptext (sLit "[] = "), pprStringInCStyle str, semi
++ ]
++
+ pprTop (CmmData _section (Statics lbl [CmmString str])) =
+ hcat [
+ pprLocalness lbl, ptext (sLit "char "), ppr lbl,
diff --git a/dev-lang/ghc/files/ghc-8.0.2_rc2-old-sphinx.patch b/dev-lang/ghc/files/ghc-8.0.2_rc2-old-sphinx.patch
new file mode 100644
index 000000000000..a4d49d3ef808
--- /dev/null
+++ b/dev-lang/ghc/files/ghc-8.0.2_rc2-old-sphinx.patch
@@ -0,0 +1,12 @@
+diff --git a/docs/users_guide/profiling.rst b/docs/users_guide/profiling.rst
+index 4d0bb3a..f980f72 100644
+--- a/docs/users_guide/profiling.rst
++++ b/docs/users_guide/profiling.rst
+@@ -435,7 +435,2 @@ To generate a heap profile from your program:
+
+-For example, here is a heap profile produced for the ``sphere`` program
+-from GHC's ``nofib`` benchmark suite,
+-
+-.. image:: images/prof_scc.*
+-
+ You might also want to take a look at