Created
February 26, 2018 09:48
-
-
Save ssorgatem/f71d58d8862934eaa95454799eeff621 to your computer and use it in GitHub Desktop.
GCC optimization patch for linux >4.16
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu | |
| index 8b8d2297d486..3733cbed9201 100644 | |
| --- a/arch/x86/Kconfig.cpu | |
| +++ b/arch/x86/Kconfig.cpu | |
| @@ -150,7 +150,7 @@ config MPENTIUM4 | |
| config MK6 | |
| - bool "K6/K6-II/K6-III" | |
| + bool "AMD K6/K6-II/K6-III" | |
| depends on X86_32 | |
| ---help--- | |
| Select this for an AMD K6-family processor. Enables use of | |
| @@ -158,7 +158,7 @@ config MK6 | |
| flags to GCC. | |
| config MK7 | |
| - bool "Athlon/Duron/K7" | |
| + bool "AMD Athlon/Duron/K7" | |
| depends on X86_32 | |
| ---help--- | |
| Select this for an AMD Athlon K7-family processor. Enables use of | |
| @@ -166,12 +166,55 @@ config MK7 | |
| flags to GCC. | |
| config MK8 | |
| - bool "Opteron/Athlon64/Hammer/K8" | |
| + bool "AMD Opteron/Athlon64/Hammer/K8" | |
| ---help--- | |
| Select this for an AMD Opteron or Athlon64 Hammer-family processor. | |
| Enables use of some extended instructions, and passes appropriate | |
| optimization flags to GCC. | |
| +config MK10 | |
| + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" | |
| + ---help--- | |
| + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, | |
| + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. | |
| + Enables use of some extended instructions, and passes appropriate | |
| + optimization flags to GCC. | |
| + | |
| +config MBARCELONA | |
| + bool "AMD Barcelona" | |
| + ---help--- | |
| + Select this for AMD Barcelona and newer processors. | |
| + | |
| + Enables -march=barcelona | |
| + | |
| +config MBOBCAT | |
| + bool "AMD Bobcat" | |
| + ---help--- | |
| + Select this for AMD Bobcat processors. | |
| + | |
| + Enables -march=btver1 | |
| + | |
| +config MBULLDOZER | |
| + bool "AMD Bulldozer" | |
| + ---help--- | |
| + Select this for AMD Bulldozer processors. | |
| + | |
| + Enables -march=bdver1 | |
| + | |
| +config MPILEDRIVER | |
| + bool "AMD Piledriver" | |
| + ---help--- | |
| + Select this for AMD Piledriver processors. | |
| + | |
| + Enables -march=bdver2 | |
| + | |
| +config MJAGUAR | |
| + bool "AMD Jaguar" | |
| + ---help--- | |
| + Select this for AMD Jaguar processors. | |
| + | |
| + Enables -march=btver2 | |
| + | |
| config MCRUSOE | |
| bool "Crusoe" | |
| depends on X86_32 | |
| @@ -262,8 +305,17 @@ config MPSC | |
| using the cpu family field | |
| in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. | |
| +config MATOM | |
| + bool "Intel Atom" | |
| + ---help--- | |
| + | |
| + Select this for the Intel Atom platform. Intel Atom CPUs have an | |
| + in-order pipelining architecture and thus can benefit from | |
| + accordingly optimized code. Use a recent GCC with specific Atom | |
| + support in order to fully benefit from selecting this option. | |
| + | |
| config MCORE2 | |
| - bool "Core 2/newer Xeon" | |
| + bool "Intel Core 2" | |
| ---help--- | |
| Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and | |
| @@ -271,14 +323,40 @@ config MCORE2 | |
| family in /proc/cpuinfo. Newer ones have 6 and older ones 15 | |
| (not a typo) | |
| -config MATOM | |
| - bool "Intel Atom" | |
| + Enables -march=core2 | |
| + | |
| +config MCOREI7 | |
| + bool "Intel Core i7" | |
| ---help--- | |
| - Select this for the Intel Atom platform. Intel Atom CPUs have an | |
| - in-order pipelining architecture and thus can benefit from | |
| - accordingly optimized code. Use a recent GCC with specific Atom | |
| - support in order to fully benefit from selecting this option. | |
| + Select this for the Intel Nehalem platform. Intel Nehalem proecessors | |
| + include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors. | |
| + | |
| + Enables -march=corei7 | |
| + | |
| +config MCOREI7AVX | |
| + bool "Intel Core 2nd Gen AVX" | |
| + ---help--- | |
| + | |
| + Select this for 2nd Gen Core processors including Sandy Bridge. | |
| + | |
| + Enables -march=corei7-avx | |
| + | |
| +config MCOREAVXI | |
| + bool "Intel Core 3rd Gen AVX" | |
| + ---help--- | |
| + | |
| + Select this for 3rd Gen Core processors including Ivy Bridge. | |
| + | |
| + Enables -march=core-avx-i | |
| + | |
| +config MCOREAVX2 | |
| + bool "Intel Core AVX2" | |
| + ---help--- | |
| + | |
| + Select this for AVX2 enabled processors including Haswell. | |
| + | |
| + Enables -march=core-avx2 | |
| config GENERIC_CPU | |
| bool "Generic-x86-64" | |
| @@ -287,6 +365,19 @@ config GENERIC_CPU | |
| Generic x86-64 CPU. | |
| Run equally well on all x86-64 CPUs. | |
| +config MNATIVE | |
| + bool "Native optimizations autodetected by GCC" | |
| + ---help--- | |
| + | |
| + GCC 4.2 and above support -march=native, which automatically detects | |
| + the optimum settings to use based on your processor. -march=native | |
| + also detects and applies additional settings beyond -march specific | |
| + to your CPU, (eg. -msse4). Unless you have a specific reason not to | |
| + (e.g. distcc cross-compiling), you should probably be using | |
| + -march=native rather than anything listed below. | |
| + | |
| + Enables -march=native | |
| + | |
| endchoice | |
| config X86_GENERIC | |
| @@ -311,7 +402,7 @@ config X86_INTERNODE_CACHE_SHIFT | |
| config X86_L1_CACHE_SHIFT | |
| int | |
| default "7" if MPENTIUM4 || MPSC | |
| - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | |
| + default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU | |
| default "4" if MELAN || M486 || MGEODEGX1 | |
| default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | |
| @@ -342,11 +433,11 @@ config X86_ALIGNMENT_16 | |
| config X86_INTEL_USERCOPY | |
| def_bool y | |
| - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 | |
| + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 | |
| config X86_USE_PPRO_CHECKSUM | |
| def_bool y | |
| - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM | |
| + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE | |
| config X86_USE_3DNOW | |
| def_bool y | |
| @@ -370,17 +461,17 @@ config X86_P6_NOP | |
| config X86_TSC | |
| def_bool y | |
| - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 | |
| + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) || X86_64 || MNATIVE | |
| config X86_CMPXCHG64 | |
| def_bool y | |
| - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 | |
| + depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 || MNATIVE | |
| # this should be set for all -march=.. options where the compiler | |
| # generates cmov. | |
| config X86_CMOV | |
| def_bool y | |
| - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) | |
| + depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) | |
| config X86_MINIMUM_CPU_FAMILY | |
| int | |
| diff --git a/arch/x86/Makefile b/arch/x86/Makefile | |
| index fad55160dcb9..0fa14514c1ab 100644 | |
| --- a/arch/x86/Makefile | |
| +++ b/arch/x86/Makefile | |
| @@ -124,11 +124,26 @@ else | |
| KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) | |
| # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) | |
| + cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) | |
| cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) | |
| + cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) | |
| + cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) | |
| + cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) | |
| + cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) | |
| + cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) | |
| + cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) | |
| cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) | |
| cflags-$(CONFIG_MCORE2) += \ | |
| - $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) | |
| + $(call cc-option,-march=core2,$(call cc-option,-mtune=core2)) | |
| + cflags-$(CONFIG_MCOREI7) += \ | |
| + $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7)) | |
| + cflags-$(CONFIG_MCOREI7AVX) += \ | |
| + $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx)) | |
| + cflags-$(CONFIG_MCOREAVXI) += \ | |
| + $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i)) | |
| + cflags-$(CONFIG_MCOREAVX2) += \ | |
| + $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2)) | |
| cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ | |
| $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) | |
| cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) | |
| diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu | |
| index 1f5faf8606b4..10dae99f07a5 100644 | |
| --- a/arch/x86/Makefile_32.cpu | |
| +++ b/arch/x86/Makefile_32.cpu | |
| @@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6) += -march=k6 | |
| # Please note, that patches that add -march=athlon-xp and friends are pointless. | |
| # They make zero difference whatsosever to performance at this time. | |
| cflags-$(CONFIG_MK7) += -march=athlon | |
| +cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) | |
| cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) | |
| +cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon) | |
| +cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon) | |
| +cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon) | |
| +cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon) | |
| +cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon) | |
| +cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon) | |
| cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0 | |
| cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0 | |
| cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) | |
| @@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) -falign-fu | |
| cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) | |
| cflags-$(CONFIG_MVIAC7) += -march=i686 | |
| cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) | |
| +cflags-$(CONFIG_MCOREI7) += -march=i686 $(call tune,corei7) | |
| +cflags-$(CONFIG_MCOREI7AVX) += -march=i686 $(call tune,corei7-avx) | |
| +cflags-$(CONFIG_MCOREAVXI) += -march=i686 $(call tune,core-avx-i) | |
| +cflags-$(CONFIG_MCOREAVX2) += -march=i686 $(call tune,core-avx2) | |
| cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ | |
| $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) | |
| diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h | |
| index 7948a17febb4..1b88e0cfd324 100644 | |
| --- a/arch/x86/include/asm/module.h | |
| +++ b/arch/x86/include/asm/module.h | |
| @@ -25,6 +25,16 @@ struct mod_arch_specific { | |
| #define MODULE_PROC_FAMILY "586MMX " | |
| #elif defined CONFIG_MCORE2 | |
| #define MODULE_PROC_FAMILY "CORE2 " | |
| +#elif defined CONFIG_MNATIVE | |
| +#define MODULE_PROC_FAMILY "NATIVE " | |
| +#elif defined CONFIG_MCOREI7 | |
| +#define MODULE_PROC_FAMILY "COREI7 " | |
| +#elif defined CONFIG_MCOREI7AVX | |
| +#define MODULE_PROC_FAMILY "COREI7AVX " | |
| +#elif defined CONFIG_MCOREAVXI | |
| +#define MODULE_PROC_FAMILY "COREAVXI " | |
| +#elif defined CONFIG_MCOREAVX2 | |
| +#define MODULE_PROC_FAMILY "COREAVX2 " | |
| #elif defined CONFIG_MATOM | |
| #define MODULE_PROC_FAMILY "ATOM " | |
| #elif defined CONFIG_M686 | |
| @@ -43,6 +53,18 @@ struct mod_arch_specific { | |
| #define MODULE_PROC_FAMILY "K7 " | |
| #elif defined CONFIG_MK8 | |
| #define MODULE_PROC_FAMILY "K8 " | |
| +#elif defined CONFIG_MK10 | |
| +#define MODULE_PROC_FAMILY "K10 " | |
| +#elif defined CONFIG_MBARCELONA | |
| +#define MODULE_PROC_FAMILY "BARCELONA " | |
| +#elif defined CONFIG_MBOBCAT | |
| +#define MODULE_PROC_FAMILY "BOBCAT " | |
| +#elif defined CONFIG_MBULLDOZER | |
| +#define MODULE_PROC_FAMILY "BULLDOZER " | |
| +#elif defined CONFIG_MPILEDRIVER | |
| +#define MODULE_PROC_FAMILY "PILEDRIVER " | |
| +#elif defined CONFIG_MJAGUAR | |
| +#define MODULE_PROC_FAMILY "JAGUAR " | |
| #elif defined CONFIG_MELAN | |
| #define MODULE_PROC_FAMILY "ELAN " | |
| #elif defined CONFIG_MCRUSOE | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment