kernel-release 5.8.0-1 (aarch64;znver1;x86_64) 2020-12096
9999

Status published
Submitter benbullard79 [@T] cox.net
Platform rolling
Repository main
URL https://abf.openmandriva.org/build_lists/813800
Packages
bpftool-5.8.0-1.aarch64.binary
cross-aarch64-openmandriva-linux-android-kernel-release-headers-5.8.0-1.aarch64.binary
cross-aarch64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.aarch64.binary
cross-aarch64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.aarch64.binary
cross-armv7hnl-openmandriva-linux-gnueabihf-kernel-release-headers-5.8.0-1.aarch64.binary
cross-armv7hnl-openmandriva-linux-musleabihf-kernel-release-headers-5.8.0-1.aarch64.binary
cross-armv7hnl-openmandriva-linux-uclibceabihf-kernel-release-headers-5.8.0-1.aarch64.binary
cross-armv7l-openmandriva-linux-androideabi-kernel-release-headers-5.8.0-1.aarch64.binary
cross-armv8l-openmandriva-linux-androideabi-kernel-release-headers-5.8.0-1.aarch64.binary
cross-i686-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.aarch64.binary
cross-i686-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.aarch64.binary
cross-i686-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.aarch64.binary
cross-ppc64le-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.aarch64.binary
cross-ppc64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.aarch64.binary
cross-riscv32-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.aarch64.binary
cross-riscv32-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.aarch64.binary
cross-riscv32-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.aarch64.binary
cross-riscv64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.aarch64.binary
cross-riscv64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.aarch64.binary
cross-riscv64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.aarch64.binary
cross-x86_64-openmandriva-linux-android-kernel-release-headers-5.8.0-1.aarch64.binary
cross-x86_64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.aarch64.binary
cross-x86_64-openmandriva-linux-gnux32-kernel-release-headers-5.8.0-1.aarch64.binary
cross-x86_64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.aarch64.binary
cross-x86_64-openmandriva-linux-muslx32-kernel-release-headers-5.8.0-1.aarch64.binary
cross-x86_64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.aarch64.binary
cross-x86_64-openmandriva-linux-uclibcx32-kernel-release-headers-5.8.0-1.aarch64.binary
kernel-release-5.8.0-1.aarch64.source
kernel-release-headers-1:5.8.0-1.aarch64.binary
kernel-release-server-5.8.0-1.aarch64.binary
kernel-release-server-devel-5.8.0-1.aarch64.binary
kernel-release-source-5.8.0-1.aarch64.binary
lib64bpf0-5.8.0-1.aarch64.binary
lib64bpf-devel-5.8.0-1.aarch64.binary
bpftool-5.8.0-1.znver1.binary
cpupower-5.8.0-1.znver1.binary
cpupower-devel-5.8.0-1.znver1.binary
cross-aarch64-openmandriva-linux-android-kernel-release-headers-5.8.0-1.znver1.binary
cross-aarch64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.znver1.binary
cross-aarch64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.znver1.binary
cross-aarch64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.znver1.binary
cross-armv7hnl-openmandriva-linux-gnueabihf-kernel-release-headers-5.8.0-1.znver1.binary
cross-armv7hnl-openmandriva-linux-musleabihf-kernel-release-headers-5.8.0-1.znver1.binary
cross-armv7hnl-openmandriva-linux-uclibceabihf-kernel-release-headers-5.8.0-1.znver1.binary
cross-armv7l-openmandriva-linux-androideabi-kernel-release-headers-5.8.0-1.znver1.binary
cross-armv8l-openmandriva-linux-androideabi-kernel-release-headers-5.8.0-1.znver1.binary
cross-i686-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.znver1.binary
cross-i686-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.znver1.binary
cross-i686-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.znver1.binary
cross-ppc64le-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.znver1.binary
cross-ppc64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.znver1.binary
cross-riscv32-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.znver1.binary
cross-riscv32-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.znver1.binary
cross-riscv32-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.znver1.binary
cross-riscv64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.znver1.binary
cross-riscv64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.znver1.binary
cross-riscv64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.znver1.binary
cross-x86_64-openmandriva-linux-android-kernel-release-headers-5.8.0-1.znver1.binary
cross-x86_64-openmandriva-linux-gnux32-kernel-release-headers-5.8.0-1.znver1.binary
cross-x86_64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.znver1.binary
cross-x86_64-openmandriva-linux-muslx32-kernel-release-headers-5.8.0-1.znver1.binary
cross-x86_64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.znver1.binary
cross-x86_64-openmandriva-linux-uclibcx32-kernel-release-headers-5.8.0-1.znver1.binary
kernel-release-5.8.0-1.znver1.source
kernel-release-desktop-5.8.0-1.znver1.binary
kernel-release-desktop-devel-5.8.0-1.znver1.binary
kernel-release-headers-1:5.8.0-1.znver1.binary
kernel-release-server-5.8.0-1.znver1.binary
kernel-release-server-devel-5.8.0-1.znver1.binary
kernel-release-source-5.8.0-1.znver1.binary
lib64bpf0-5.8.0-1.znver1.binary
lib64bpf-devel-5.8.0-1.znver1.binary
turbostat-5.8.0-1.znver1.binary
x86_energy_perf_policy-5.8.0-1.znver1.binary
bpftool-5.8.0-1.x86_64.binary
cpupower-5.8.0-1.x86_64.binary
cpupower-devel-5.8.0-1.x86_64.binary
cross-aarch64-openmandriva-linux-android-kernel-release-headers-5.8.0-1.x86_64.binary
cross-aarch64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.x86_64.binary
cross-aarch64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.x86_64.binary
cross-aarch64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.x86_64.binary
cross-armv7hnl-openmandriva-linux-gnueabihf-kernel-release-headers-5.8.0-1.x86_64.binary
cross-armv7hnl-openmandriva-linux-musleabihf-kernel-release-headers-5.8.0-1.x86_64.binary
cross-armv7hnl-openmandriva-linux-uclibceabihf-kernel-release-headers-5.8.0-1.x86_64.binary
cross-armv7l-openmandriva-linux-androideabi-kernel-release-headers-5.8.0-1.x86_64.binary
cross-armv8l-openmandriva-linux-androideabi-kernel-release-headers-5.8.0-1.x86_64.binary
cross-i686-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.x86_64.binary
cross-i686-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.x86_64.binary
cross-i686-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.x86_64.binary
cross-ppc64le-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.x86_64.binary
cross-ppc64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.x86_64.binary
cross-riscv32-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.x86_64.binary
cross-riscv32-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.x86_64.binary
cross-riscv32-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.x86_64.binary
cross-riscv64-openmandriva-linux-gnu-kernel-release-headers-5.8.0-1.x86_64.binary
cross-riscv64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.x86_64.binary
cross-riscv64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.x86_64.binary
cross-x86_64-openmandriva-linux-android-kernel-release-headers-5.8.0-1.x86_64.binary
cross-x86_64-openmandriva-linux-gnux32-kernel-release-headers-5.8.0-1.x86_64.binary
cross-x86_64-openmandriva-linux-musl-kernel-release-headers-5.8.0-1.x86_64.binary
cross-x86_64-openmandriva-linux-muslx32-kernel-release-headers-5.8.0-1.x86_64.binary
cross-x86_64-openmandriva-linux-uclibc-kernel-release-headers-5.8.0-1.x86_64.binary
cross-x86_64-openmandriva-linux-uclibcx32-kernel-release-headers-5.8.0-1.x86_64.binary
kernel-release-5.8.0-1.x86_64.source
kernel-release-desktop-5.8.0-1.x86_64.binary
kernel-release-desktop-devel-5.8.0-1.x86_64.binary
kernel-release-headers-1:5.8.0-1.x86_64.binary
kernel-release-server-5.8.0-1.x86_64.binary
kernel-release-server-devel-5.8.0-1.x86_64.binary
kernel-release-source-5.8.0-1.x86_64.binary
lib64bpf0-5.8.0-1.x86_64.binary
lib64bpf-devel-5.8.0-1.x86_64.binary
turbostat-5.8.0-1.x86_64.binary
x86_energy_perf_policy-5.8.0-1.x86_64.binary
Build Date 2020-08-09 00:15:44 +0000 UTC
Last Updated 2020-08-13 01:35:12.235117517 +0000 UTC
$ git diff --patch-with-stat --summary 8b71359564ed514682b6057824394ec385a64627..2d8067d4b2595be29ef8352276513865132a2271

 .abf.yml                                           |    4 +-
 0001-cpu-5.2-merge-graysky-s-patchset.patch        |   75 +-
 ...ble-building-ashmem-and-binder-as-modules.patch |   38 +-
 export-symbols-needed-by-android-drivers.patch     |  114 +-
 fix_virtualbox.patch                               |   20 +
 fixes_for_changes_in_cpu_tlbstate.patch            |   25 +
 fixes_for_mm_struct.patch                          |  161 +
 fixes_for_module_memory.patch                      |   51 +
 ...l-5.7-fewer-conditions-for-ARM64_PTR_AUTH.patch |   12 +-
 kernel-release.spec                                |   79 +-
 linux-5.8.tar.sign                                 |   19 +
 nvme-pci-more-info.patch                           |   12 +-
 powerpc-common.config                              |    1 +
 ...ernel-and-ramfs-comp-and-decomp-with-zstd.patch |  182 +-
 uksm-5.8.patch                                     | 6934 ++++++++++++++++++++
 znver1-common.config                               |    2 +
 16 files changed, 7472 insertions(+), 257 deletions(-)
 create mode 100644 fix_virtualbox.patch
 create mode 100644 fixes_for_changes_in_cpu_tlbstate.patch
 create mode 100644 fixes_for_mm_struct.patch
 create mode 100644 fixes_for_module_memory.patch
 create mode 100644 linux-5.8.tar.sign
 create mode 100644 powerpc-common.config
 create mode 100644 uksm-5.8.patch

diff --git a/.abf.yml b/.abf.yml
index cef6999..0d2bfc7 100644
--- a/.abf.yml
+++ b/.abf.yml
@@ -1,6 +1,4 @@
 sources:
   extra-wifi-drivers-20200301.tar.zst: 3390c738c7d91250714ce0f88d26371e93bc40b8
   saa716x-driver.tar.xz: f9b6ef1cd6f1f71f53d9a8aadfba2cf6b5c3d7b6
-  linux-5.7.tar.xz: 07e40057b78f1c9dd2b042056325d99fcf9f8a08
-  linux-5.7.tar.sign: 9bdcc9e48ee0eca0baa8118b9a10ba2eda64363e
-  patch-5.7.11.xz: 22e11d00f94d641f0167db08f61a8feb2f231a25
+  linux-5.8.tar.xz: 1f52755a5128c1a6719b3294b73c59aa9597937e
diff --git a/0001-cpu-5.2-merge-graysky-s-patchset.patch b/0001-cpu-5.2-merge-graysky-s-patchset.patch
index ad3c196..4346965 100644
--- a/0001-cpu-5.2-merge-graysky-s-patchset.patch
+++ b/0001-cpu-5.2-merge-graysky-s-patchset.patch
@@ -1,7 +1,7 @@
-diff -up linux-5.5-rc1/arch/x86/include/asm/vermagic.h.17~ linux-5.5-rc1/arch/x86/include/asm/vermagic.h
---- linux-5.5-rc1/arch/x86/include/asm/vermagic.h.17~	2019-12-08 23:57:55.000000000 +0100
-+++ linux-5.5-rc1/arch/x86/include/asm/vermagic.h	2019-12-10 17:00:36.851313621 +0100
-@@ -27,6 +27,30 @@ struct mod_arch_specific {
+diff -up linux-5.8-rc1/arch/x86/include/asm/vermagic.h.19~ linux-5.8-rc1/arch/x86/include/asm/vermagic.h
+--- linux-5.8-rc1/arch/x86/include/asm/vermagic.h.19~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/include/asm/vermagic.h	2020-06-16 21:52:41.132824421 +0200
+@@ -17,6 +17,30 @@
  #define MODULE_PROC_FAMILY "586MMX "
  #elif defined CONFIG_MCORE2
  #define MODULE_PROC_FAMILY "CORE2 "
@@ -32,7 +32,7 @@ diff -up linux-5.5-rc1/arch/x86/include/asm/vermagic.h.17~ linux-5.5-rc1/arch/x8
  #elif defined CONFIG_MATOM
  #define MODULE_PROC_FAMILY "ATOM "
  #elif defined CONFIG_M686
-@@ -45,6 +69,26 @@ struct mod_arch_specific {
+@@ -35,6 +59,26 @@
  #define MODULE_PROC_FAMILY "K7 "
  #elif defined CONFIG_MK8
  #define MODULE_PROC_FAMILY "K8 "
@@ -59,44 +59,42 @@ diff -up linux-5.5-rc1/arch/x86/include/asm/vermagic.h.17~ linux-5.5-rc1/arch/x8
  #elif defined CONFIG_MELAN
  #define MODULE_PROC_FAMILY "ELAN "
  #elif defined CONFIG_MCRUSOE
-diff -up linux-5.5-rc1/arch/x86/Kconfig.cpu.17~ linux-5.5-rc1/arch/x86/Kconfig.cpu
---- linux-5.5-rc1/arch/x86/Kconfig.cpu.17~	2019-12-08 23:57:55.000000000 +0100
-+++ linux-5.5-rc1/arch/x86/Kconfig.cpu	2019-12-10 17:02:11.581317798 +0100
+diff -up linux-5.8-rc1/arch/x86/Kconfig.cpu.19~ linux-5.8-rc1/arch/x86/Kconfig.cpu
+--- linux-5.8-rc1/arch/x86/Kconfig.cpu.19~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/Kconfig.cpu	2020-06-16 22:03:50.575913298 +0200
 @@ -123,6 +123,7 @@ config MPENTIUMM
  config MPENTIUM4
  	bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
  	depends on X86_32
 +	select X86_P6_NOP
- 	---help---
+ 	help
  	  Select this for Intel Pentium 4 chips.  This includes the
  	  Pentium 4, Pentium D, P4-based Celeron and Xeon, and
-@@ -155,9 +156,8 @@ config MPENTIUM4
- 		-Paxville
- 		-Dempsey
+@@ -157,7 +158,7 @@ config MPENTIUM4
+ 
  
--
  config MK6
 -	bool "K6/K6-II/K6-III"
 +	bool "AMD K6/K6-II/K6-III"
  	depends on X86_32
- 	---help---
+ 	help
  	  Select this for an AMD K6-family processor.  Enables use of
-@@ -165,7 +165,7 @@ config MK6
+@@ -165,7 +166,7 @@ config MK6
  	  flags to GCC.
  
  config MK7
 -	bool "Athlon/Duron/K7"
 +	bool "AMD Athlon/Duron/K7"
  	depends on X86_32
- 	---help---
+ 	help
  	  Select this for an AMD Athlon K7-family processor.  Enables use of
-@@ -173,12 +173,83 @@ config MK7
+@@ -173,12 +174,83 @@ config MK7
  	  flags to GCC.
  
  config MK8
 -	bool "Opteron/Athlon64/Hammer/K8"
 +	bool "AMD Opteron/Athlon64/Hammer/K8"
- 	---help---
+ 	help
  	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
  	  Enables use of some extended instructions, and passes appropriate
  	  optimization flags to GCC.
@@ -175,15 +173,15 @@ diff -up linux-5.5-rc1/arch/x86/Kconfig.cpu.17~ linux-5.5-rc1/arch/x86/Kconfig.c
  config MCRUSOE
  	bool "Crusoe"
  	depends on X86_32
-@@ -260,6 +331,7 @@ config MVIAC7
+@@ -260,6 +332,7 @@ config MVIAC7
  
  config MPSC
  	bool "Intel P4 / older Netburst based Xeon"
 +	select X86_P6_NOP
  	depends on X86_64
- 	---help---
+ 	help
  	  Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
-@@ -269,8 +341,19 @@ config MPSC
+@@ -269,8 +342,19 @@ config MPSC
  	  using the cpu family field
  	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
  
@@ -201,26 +199,27 @@ diff -up linux-5.5-rc1/arch/x86/Kconfig.cpu.17~ linux-5.5-rc1/arch/x86/Kconfig.c
 -	bool "Core 2/newer Xeon"
 +	bool "Intel Core 2"
 +	select X86_P6_NOP
- 	---help---
+ 	help
  
  	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
-@@ -278,14 +361,106 @@ config MCORE2
+@@ -278,14 +362,106 @@ config MCORE2
  	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
  	  (not a typo)
  
 -config MATOM
 -	bool "Intel Atom"
+-	help
 +	  Enables -march=core2
-+
-+config MNEHALEM
-+	bool "Intel Nehalem"
-+	select X86_P6_NOP
- 	---help---
  
 -	  Select this for the Intel Atom platform. Intel Atom CPUs have an
 -	  in-order pipelining architecture and thus can benefit from
 -	  accordingly optimized code. Use a recent GCC with specific Atom
 -	  support in order to fully benefit from selecting this option.
++config MNEHALEM
++	bool "Intel Nehalem"
++	select X86_P6_NOP
++	---help---
++
 +	  Select this for 1st Gen Core processors in the Nehalem family.
 +
 +	  Enables -march=nehalem
@@ -317,7 +316,7 @@ diff -up linux-5.5-rc1/arch/x86/Kconfig.cpu.17~ linux-5.5-rc1/arch/x86/Kconfig.c
  
  config GENERIC_CPU
  	bool "Generic-x86-64"
-@@ -294,6 +469,19 @@ config GENERIC_CPU
+@@ -294,6 +470,19 @@ config GENERIC_CPU
  	  Generic x86-64 CPU.
  	  Run equally well on all x86-64 CPUs.
  
@@ -337,7 +336,7 @@ diff -up linux-5.5-rc1/arch/x86/Kconfig.cpu.17~ linux-5.5-rc1/arch/x86/Kconfig.c
  endchoice
  
  config X86_GENERIC
-@@ -318,7 +506,7 @@ config X86_INTERNODE_CACHE_SHIFT
+@@ -318,7 +507,7 @@ config X86_INTERNODE_CACHE_SHIFT
  config X86_L1_CACHE_SHIFT
  	int
  	default "7" if MPENTIUM4 || MPSC
@@ -346,7 +345,7 @@ diff -up linux-5.5-rc1/arch/x86/Kconfig.cpu.17~ linux-5.5-rc1/arch/x86/Kconfig.c
  	default "4" if MELAN || M486SX || M486 || MGEODEGX1
  	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
  
-@@ -336,35 +524,36 @@ config X86_ALIGNMENT_16
+@@ -336,35 +525,36 @@ config X86_ALIGNMENT_16
  
  config X86_INTEL_USERCOPY
  	def_bool y
@@ -400,7 +399,7 @@ diff -up linux-5.5-rc1/arch/x86/Kconfig.cpu.17~ linux-5.5-rc1/arch/x86/Kconfig.c
  
  config X86_CMPXCHG64
  	def_bool y
-@@ -374,7 +563,7 @@ config X86_CMPXCHG64
+@@ -374,7 +564,7 @@ config X86_CMPXCHG64
  # generates cmov.
  config X86_CMOV
  	def_bool y
@@ -409,9 +408,9 @@ diff -up linux-5.5-rc1/arch/x86/Kconfig.cpu.17~ linux-5.5-rc1/arch/x86/Kconfig.c
  
  config X86_MINIMUM_CPU_FAMILY
  	int
-diff -up linux-5.5-rc1/arch/x86/Makefile.17~ linux-5.5-rc1/arch/x86/Makefile
---- linux-5.5-rc1/arch/x86/Makefile.17~	2019-12-08 23:57:55.000000000 +0100
-+++ linux-5.5-rc1/arch/x86/Makefile	2019-12-10 17:00:36.851313621 +0100
+diff -up linux-5.8-rc1/arch/x86/Makefile.19~ linux-5.8-rc1/arch/x86/Makefile
+--- linux-5.8-rc1/arch/x86/Makefile.19~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/Makefile	2020-06-16 21:52:41.132824421 +0200
 @@ -119,13 +119,46 @@ else
  	KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
  
@@ -462,9 +461,9 @@ diff -up linux-5.5-rc1/arch/x86/Makefile.17~ linux-5.5-rc1/arch/x86/Makefile
          cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
          KBUILD_CFLAGS += $(cflags-y)
  
-diff -up linux-5.5-rc1/arch/x86/Makefile_32.cpu.17~ linux-5.5-rc1/arch/x86/Makefile_32.cpu
---- linux-5.5-rc1/arch/x86/Makefile_32.cpu.17~	2019-12-08 23:57:55.000000000 +0100
-+++ linux-5.5-rc1/arch/x86/Makefile_32.cpu	2019-12-10 17:00:36.851313621 +0100
+diff -up linux-5.8-rc1/arch/x86/Makefile_32.cpu.19~ linux-5.8-rc1/arch/x86/Makefile_32.cpu
+--- linux-5.8-rc1/arch/x86/Makefile_32.cpu.19~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/Makefile_32.cpu	2020-06-16 21:52:41.133824431 +0200
 @@ -24,7 +24,18 @@ cflags-$(CONFIG_MK6)		+= -march=k6
  # Please note, that patches that add -march=athlon-xp and friends are pointless.
  # They make zero difference whatsosever to performance at this time.
diff --git a/android-enable-building-ashmem-and-binder-as-modules.patch b/android-enable-building-ashmem-and-binder-as-modules.patch
index 05ad8f8..6bc176f 100644
--- a/android-enable-building-ashmem-and-binder-as-modules.patch
+++ b/android-enable-building-ashmem-and-binder-as-modules.patch
@@ -1,6 +1,6 @@
-diff -up linux-5.6-rc3/drivers/android/binder_alloc.c.22~ linux-5.6-rc3/drivers/android/binder_alloc.c
---- linux-5.6-rc3/drivers/android/binder_alloc.c.22~	2020-02-24 01:17:42.000000000 +0100
-+++ linux-5.6-rc3/drivers/android/binder_alloc.c	2020-02-24 23:26:47.054727484 +0100
+diff -up linux-5.8-rc1/drivers/android/binder_alloc.c.20~ linux-5.8-rc1/drivers/android/binder_alloc.c
+--- linux-5.8-rc1/drivers/android/binder_alloc.c.20~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/drivers/android/binder_alloc.c	2020-06-16 22:04:12.096569623 +0200
 @@ -38,7 +38,7 @@ enum {
  };
  static uint32_t binder_alloc_debug_mask = BINDER_DEBUG_USER_ERROR;
@@ -10,9 +10,9 @@ diff -up linux-5.6-rc3/drivers/android/binder_alloc.c.22~ linux-5.6-rc3/drivers/
  		   uint, 0644);
  
  #define binder_alloc_debug(mask, x...) \
-diff -up linux-5.6-rc3/drivers/android/Kconfig.22~ linux-5.6-rc3/drivers/android/Kconfig
---- linux-5.6-rc3/drivers/android/Kconfig.22~	2020-02-24 01:17:42.000000000 +0100
-+++ linux-5.6-rc3/drivers/android/Kconfig	2020-02-24 23:26:47.054727484 +0100
+diff -up linux-5.8-rc1/drivers/android/Kconfig.20~ linux-5.8-rc1/drivers/android/Kconfig
+--- linux-5.8-rc1/drivers/android/Kconfig.20~	2020-06-16 22:04:12.096569623 +0200
++++ linux-5.8-rc1/drivers/android/Kconfig	2020-06-16 22:05:07.409779391 +0200
 @@ -9,7 +9,7 @@ config ANDROID
  if ANDROID
  
@@ -21,10 +21,10 @@ diff -up linux-5.6-rc3/drivers/android/Kconfig.22~ linux-5.6-rc3/drivers/android
 +	tristate "Android Binder IPC Driver"
  	depends on MMU
  	default n
- 	---help---
-diff -up linux-5.6-rc3/drivers/android/Makefile.22~ linux-5.6-rc3/drivers/android/Makefile
---- linux-5.6-rc3/drivers/android/Makefile.22~	2020-02-24 01:17:42.000000000 +0100
-+++ linux-5.6-rc3/drivers/android/Makefile	2020-02-24 23:26:47.054727484 +0100
+ 	help
+diff -up linux-5.8-rc1/drivers/android/Makefile.20~ linux-5.8-rc1/drivers/android/Makefile
+--- linux-5.8-rc1/drivers/android/Makefile.20~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/drivers/android/Makefile	2020-06-16 22:04:12.096569623 +0200
 @@ -1,6 +1,7 @@
  # SPDX-License-Identifier: GPL-2.0-only
  ccflags-y += -I$(src)			# needed for trace events
@@ -36,9 +36,9 @@ diff -up linux-5.6-rc3/drivers/android/Makefile.22~ linux-5.6-rc3/drivers/androi
 +binder_linux-y := binder.o binder_alloc.o
 +binder_linux-$(CONFIG_ANDROID_BINDERFS)	+= binderfs.o
 +binder_linux-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
-diff -up linux-5.6-rc3/drivers/staging/android/ashmem.c.22~ linux-5.6-rc3/drivers/staging/android/ashmem.c
---- linux-5.6-rc3/drivers/staging/android/ashmem.c.22~	2020-02-24 01:17:42.000000000 +0100
-+++ linux-5.6-rc3/drivers/staging/android/ashmem.c	2020-02-24 23:26:47.054727484 +0100
+diff -up linux-5.8-rc1/drivers/staging/android/ashmem.c.20~ linux-5.8-rc1/drivers/staging/android/ashmem.c
+--- linux-5.8-rc1/drivers/staging/android/ashmem.c.20~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/drivers/staging/android/ashmem.c	2020-06-16 22:04:12.096569623 +0200
 @@ -24,6 +24,7 @@
  #include <linux/bitops.h>
  #include <linux/mutex.h>
@@ -53,9 +53,9 @@ diff -up linux-5.6-rc3/drivers/staging/android/ashmem.c.22~ linux-5.6-rc3/driver
  device_initcall(ashmem_init);
 +
 +MODULE_LICENSE("GPL v2");
-diff -up linux-5.6-rc3/drivers/staging/android/Kconfig.22~ linux-5.6-rc3/drivers/staging/android/Kconfig
---- linux-5.6-rc3/drivers/staging/android/Kconfig.22~	2020-02-24 01:17:42.000000000 +0100
-+++ linux-5.6-rc3/drivers/staging/android/Kconfig	2020-02-24 23:26:47.054727484 +0100
+diff -up linux-5.8-rc1/drivers/staging/android/Kconfig.20~ linux-5.8-rc1/drivers/staging/android/Kconfig
+--- linux-5.8-rc1/drivers/staging/android/Kconfig.20~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/drivers/staging/android/Kconfig	2020-06-16 22:04:12.096569623 +0200
 @@ -4,7 +4,7 @@ menu "Android"
  if ANDROID
  
@@ -65,9 +65,9 @@ diff -up linux-5.6-rc3/drivers/staging/android/Kconfig.22~ linux-5.6-rc3/drivers
  	depends on SHMEM
  	help
  	  The ashmem subsystem is a new shared memory allocator, similar to
-diff -up linux-5.6-rc3/drivers/staging/android/Makefile.22~ linux-5.6-rc3/drivers/staging/android/Makefile
---- linux-5.6-rc3/drivers/staging/android/Makefile.22~	2020-02-24 23:26:47.054727484 +0100
-+++ linux-5.6-rc3/drivers/staging/android/Makefile	2020-02-24 23:27:56.058734039 +0100
+diff -up linux-5.8-rc1/drivers/staging/android/Makefile.20~ linux-5.8-rc1/drivers/staging/android/Makefile
+--- linux-5.8-rc1/drivers/staging/android/Makefile.20~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/drivers/staging/android/Makefile	2020-06-16 22:04:12.096569623 +0200
 @@ -3,4 +3,6 @@ ccflags-y += -I$(src)			# needed for tra
  
  obj-y					+= ion/
diff --git a/export-symbols-needed-by-android-drivers.patch b/export-symbols-needed-by-android-drivers.patch
index 437a37e..5af2b24 100644
--- a/export-symbols-needed-by-android-drivers.patch
+++ b/export-symbols-needed-by-android-drivers.patch
@@ -1,31 +1,7 @@
-From: Ben Hutchings <ben@decadent.org.uk>
-Date: Tue, 26 Jun 2018 16:59:01 +0100
-Subject: Export symbols needed by Android drivers
-Bug-Debian: https://bugs.debian.org/901492
-
-We want to enable use of the Android ashmem and binder drivers to
-support Anbox, but they should not be built-in as that would waste
-resources and increase security attack surface on systems that don't
-need them.
-
-Export the currently un-exported symbols they depend on.
-
----
- fs/file.c           | 5 +++++
- kernel/fork.c       | 1 +
- kernel/sched/core.c | 1 +
- kernel/signal.c     | 1 +
- kernel/task_work.c  | 1 +
- mm/memory.c         | 1 +
- mm/shmem.c          | 1 +
- mm/vmalloc.c        | 2 ++
- security/security.c | 4 ++++
- 9 files changed, 17 insertions(+)
-
-diff -uraN linux-5.7.8/fs/file.c omv-linux-5.7.8/fs/file.c
---- linux-5.7.8/fs/file.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/fs/file.c	2020-07-13 15:14:49.188263989 +0200
-@@ -409,6 +409,7 @@
+diff -up linux-5.8-rc4/fs/file.c.19~ linux-5.8-rc4/fs/file.c
+--- linux-5.8-rc4/fs/file.c.19~	2020-07-06 01:20:22.000000000 +0200
++++ linux-5.8-rc4/fs/file.c	2020-07-06 16:29:44.030778649 +0200
+@@ -409,6 +409,7 @@ struct files_struct *get_files_struct(st
  
  	return files;
  }
@@ -33,7 +9,7 @@ diff -uraN linux-5.7.8/fs/file.c omv-linux-5.7.8/fs/file.c
  
  void put_files_struct(struct files_struct *files)
  {
-@@ -421,6 +422,7 @@
+@@ -421,6 +422,7 @@ void put_files_struct(struct files_struc
  		kmem_cache_free(files_cachep, files);
  	}
  }
@@ -41,7 +17,7 @@ diff -uraN linux-5.7.8/fs/file.c omv-linux-5.7.8/fs/file.c
  
  void reset_files_struct(struct files_struct *files)
  {
-@@ -534,6 +536,7 @@
+@@ -534,6 +536,7 @@ out:
  	spin_unlock(&files->file_lock);
  	return error;
  }
@@ -49,7 +25,7 @@ diff -uraN linux-5.7.8/fs/file.c omv-linux-5.7.8/fs/file.c
  
  static int alloc_fd(unsigned start, unsigned flags)
  {
-@@ -612,6 +615,7 @@
+@@ -612,6 +615,7 @@ void __fd_install(struct files_struct *f
  	rcu_assign_pointer(fdt->fd[fd], file);
  	rcu_read_unlock_sched();
  }
@@ -57,7 +33,7 @@ diff -uraN linux-5.7.8/fs/file.c omv-linux-5.7.8/fs/file.c
  
  void fd_install(unsigned int fd, struct file *file)
  {
-@@ -676,6 +680,7 @@
+@@ -676,6 +680,7 @@ out_unlock:
  	*res = NULL;
  	return -ENOENT;
  }
@@ -65,10 +41,10 @@ diff -uraN linux-5.7.8/fs/file.c omv-linux-5.7.8/fs/file.c
  
  void do_close_on_exec(struct files_struct *files)
  {
-diff -uraN linux-5.7.8/kernel/fork.c omv-linux-5.7.8/kernel/fork.c
---- linux-5.7.8/kernel/fork.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/kernel/fork.c	2020-07-13 15:15:57.177994389 +0200
-@@ -1123,6 +1123,7 @@
+diff -up linux-5.8-rc4/kernel/fork.c.19~ linux-5.8-rc4/kernel/fork.c
+--- linux-5.8-rc4/kernel/fork.c.19~	2020-07-06 01:20:22.000000000 +0200
++++ linux-5.8-rc4/kernel/fork.c	2020-07-06 16:29:44.030778649 +0200
+@@ -1131,6 +1131,7 @@ void mmput_async(struct mm_struct *mm)
  		schedule_work(&mm->async_put_work);
  	}
  }
@@ -76,10 +52,10 @@ diff -uraN linux-5.7.8/kernel/fork.c omv-linux-5.7.8/kernel/fork.c
  #endif
  
  /**
-diff -uraN linux-5.7.8/kernel/sched/core.c omv-linux-5.7.8/kernel/sched/core.c
---- linux-5.7.8/kernel/sched/core.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/kernel/sched/core.c	2020-07-13 15:16:48.193591744 +0200
-@@ -4572,6 +4572,7 @@
+diff -up linux-5.8-rc4/kernel/sched/core.c.19~ linux-5.8-rc4/kernel/sched/core.c
+--- linux-5.8-rc4/kernel/sched/core.c.19~	2020-07-06 01:20:22.000000000 +0200
++++ linux-5.8-rc4/kernel/sched/core.c	2020-07-06 16:29:44.030778649 +0200
+@@ -4667,6 +4667,7 @@ int can_nice(const struct task_struct *p
  	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
  		capable(CAP_SYS_NICE));
  }
@@ -87,10 +63,10 @@ diff -uraN linux-5.7.8/kernel/sched/core.c omv-linux-5.7.8/kernel/sched/core.c
  
  #ifdef __ARCH_WANT_SYS_NICE
  
-diff -uraN linux-5.7.8/kernel/signal.c omv-linux-5.7.8/kernel/signal.c
---- linux-5.7.8/kernel/signal.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/kernel/signal.c	2020-07-13 15:18:04.854196194 +0200
-@@ -1396,6 +1396,7 @@
+diff -up linux-5.8-rc4/kernel/signal.c.19~ linux-5.8-rc4/kernel/signal.c
+--- linux-5.8-rc4/kernel/signal.c.19~	2020-07-06 01:20:22.000000000 +0200
++++ linux-5.8-rc4/kernel/signal.c	2020-07-06 16:29:44.031778664 +0200
+@@ -1396,6 +1396,7 @@ struct sighand_struct *__lock_task_sigha
  
  	return sighand;
  }
@@ -98,10 +74,10 @@ diff -uraN linux-5.7.8/kernel/signal.c omv-linux-5.7.8/kernel/signal.c
  
  /*
   * send signal info to all the members of a group
-diff -uraN linux-5.7.8/kernel/task_work.c omv-linux-5.7.8/kernel/task_work.c
---- linux-5.7.8/kernel/task_work.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/kernel/task_work.c	2020-07-13 15:18:57.298331300 +0200
-@@ -52,6 +52,7 @@
+diff -up linux-5.8-rc4/kernel/task_work.c.19~ linux-5.8-rc4/kernel/task_work.c
+--- linux-5.8-rc4/kernel/task_work.c.19~	2020-07-06 16:29:44.031778664 +0200
++++ linux-5.8-rc4/kernel/task_work.c	2020-07-06 16:31:08.466001784 +0200
+@@ -52,6 +52,7 @@ task_work_add(struct task_struct *task,
  
  	return 0;
  }
@@ -109,10 +85,10 @@ diff -uraN linux-5.7.8/kernel/task_work.c omv-linux-5.7.8/kernel/task_work.c
  
  /**
   * task_work_cancel - cancel a pending work added by task_work_add()
-diff -uraN linux-5.7.8/mm/memory.c omv-linux-5.7.8/mm/memory.c
---- linux-5.7.8/mm/memory.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/mm/memory.c	2020-07-13 15:19:48.003475257 +0200
-@@ -1370,6 +1370,7 @@
+diff -up linux-5.8-rc4/mm/memory.c.19~ linux-5.8-rc4/mm/memory.c
+--- linux-5.8-rc4/mm/memory.c.19~	2020-07-06 01:20:22.000000000 +0200
++++ linux-5.8-rc4/mm/memory.c	2020-07-06 16:29:44.031778664 +0200
+@@ -1367,6 +1367,7 @@ void zap_page_range(struct vm_area_struc
  	mmu_notifier_invalidate_range_end(&range);
  	tlb_finish_mmu(&tlb, start, range.end);
  }
@@ -120,10 +96,10 @@ diff -uraN linux-5.7.8/mm/memory.c omv-linux-5.7.8/mm/memory.c
  
  /**
   * zap_page_range_single - remove user pages in a given range
-diff -uraN linux-5.7.8/mm/shmem.c omv-linux-5.7.8/mm/shmem.c
---- linux-5.7.8/mm/shmem.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/mm/shmem.c	2020-07-13 15:20:27.829371644 +0200
-@@ -4185,6 +4185,7 @@
+diff -up linux-5.8-rc4/mm/shmem.c.19~ linux-5.8-rc4/mm/shmem.c
+--- linux-5.8-rc4/mm/shmem.c.19~	2020-07-06 01:20:22.000000000 +0200
++++ linux-5.8-rc4/mm/shmem.c	2020-07-06 16:29:44.031778664 +0200
+@@ -4158,6 +4158,7 @@ int shmem_zero_setup(struct vm_area_stru
  
  	return 0;
  }
@@ -131,18 +107,18 @@ diff -uraN linux-5.7.8/mm/shmem.c omv-linux-5.7.8/mm/shmem.c
  
  /**
   * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
-diff -uraN linux-5.7.8/mm/vmalloc.c omv-linux-5.7.8/mm/vmalloc.c
---- linux-5.7.8/mm/vmalloc.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/mm/vmalloc.c	2020-07-13 15:22:00.544919570 +0200
-@@ -2011,6 +2011,7 @@
- {
- 	return vmap_page_range_noflush(addr, addr + size, prot, pages);
+diff -up linux-5.8-rc4/mm/vmalloc.c.19~ linux-5.8-rc4/mm/vmalloc.c
+--- linux-5.8-rc4/mm/vmalloc.c.19~	2020-07-06 01:20:22.000000000 +0200
++++ linux-5.8-rc4/mm/vmalloc.c	2020-07-06 16:29:44.032778679 +0200
+@@ -315,6 +315,7 @@ int map_kernel_range_noflush(unsigned lo
+ 
+ 	return 0;
  }
 +EXPORT_SYMBOL_GPL(map_kernel_range_noflush);
  
- /**
-  * unmap_kernel_range_noflush - unmap kernel VM area
-@@ -2161,6 +2162,7 @@
+ int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
+ 		struct page **pages)
+@@ -2147,6 +2148,7 @@ struct vm_struct *get_vm_area(unsigned l
  				  NUMA_NO_NODE, GFP_KERNEL,
  				  __builtin_return_address(0));
  }
@@ -150,10 +126,10 @@ diff -uraN linux-5.7.8/mm/vmalloc.c omv-linux-5.7.8/mm/vmalloc.c
  
  struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
  				const void *caller)
-diff -uraN linux-5.7.8/security/security.c omv-linux-5.7.8/security/security.c
---- linux-5.7.8/security/security.c	2020-07-09 09:39:40.000000000 +0200
-+++ omv-linux-5.7.8/security/security.c	2020-07-13 15:23:59.601954504 +0200
-@@ -725,24 +725,28 @@
+diff -up linux-5.8-rc4/security/security.c.19~ linux-5.8-rc4/security/security.c
+--- linux-5.8-rc4/security/security.c.19~	2020-07-06 01:20:22.000000000 +0200
++++ linux-5.8-rc4/security/security.c	2020-07-06 16:29:44.032778679 +0200
+@@ -725,24 +725,28 @@ int security_binder_set_context_mgr(stru
  {
  	return call_int_hook(binder_set_context_mgr, 0, mgr);
  }
diff --git a/fix_virtualbox.patch b/fix_virtualbox.patch
new file mode 100644
index 0000000..9197b88
--- /dev/null
+++ b/fix_virtualbox.patch
@@ -0,0 +1,20 @@
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index 3091c2ca60df..f027bc7d2534 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -325,6 +325,7 @@ int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
+ 	flush_cache_vmap(start, start + size);
+ 	return ret;
+ }
++EXPORT_SYMBOL(map_kernel_range);
+ 
+ int is_vmalloc_or_module_addr(const void *x)
+ {
+@@ -2130,6 +2131,7 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
+ 	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
+ 				  GFP_KERNEL, caller);
+ }
++EXPORT_SYMBOL(__get_vm_area_caller);
+ 
+ /**
+  * get_vm_area - reserve a contiguous kernel virtual area
diff --git a/fixes_for_changes_in_cpu_tlbstate.patch b/fixes_for_changes_in_cpu_tlbstate.patch
new file mode 100644
index 0000000..134847d
--- /dev/null
+++ b/fixes_for_changes_in_cpu_tlbstate.patch
@@ -0,0 +1,25 @@
+Index: linux-5.8-rc2/drivers/virt/vboxdrv/linux/SUPDrv-linux.c
+===================================================================
+--- linux-5.8-rc2/drivers/virt/vboxdrv/linux/SUPDrv-linux.c
++++ linux-5.8-rc2/drivers/virt/vboxdrv/linux/SUPDrv-linux.c
+@@ -763,12 +763,19 @@ EXPORT_SYMBOL(SUPDrvLinuxIDC);
+ RTCCUINTREG VBOXCALL supdrvOSChangeCR4(RTCCUINTREG fOrMask, RTCCUINTREG fAndMask)
+ {
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 20, 0)
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+     RTCCUINTREG uOld = this_cpu_read(cpu_tlbstate.cr4);
++#else
++    RTCCUINTREG uOld = __read_cr4();
++#endif
+     RTCCUINTREG uNew = (uOld & fAndMask) | fOrMask;
+     if (uNew != uOld)
+     {
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+         this_cpu_write(cpu_tlbstate.cr4, uNew);
+         __write_cr4(uNew);
++#endif
++        ASMSetCR4(uNew);
+     }
+ #else
+     RTCCUINTREG uOld = ASMGetCR4();
+
diff --git a/fixes_for_mm_struct.patch b/fixes_for_mm_struct.patch
new file mode 100644
index 0000000..5f3d900
--- /dev/null
+++ b/fixes_for_mm_struct.patch
@@ -0,0 +1,161 @@
+Index: linux-5.8-rc2/drivers/virt/vboxdrv/r0drv/linux/memobj-r0drv-linux.c
+===================================================================
+--- linux-5.8-rc2/drivers/virt/vboxdrv/r0drv/linux/memobj-r0drv-linux.c
++++ linux-5.8-rc2/drivers/virt/vboxdrv/r0drv/linux/memobj-r0drv-linux.c
+@@ -222,9 +222,17 @@ static void *rtR0MemObjLinuxDoMmap(RTR3P
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+         ulAddr = vm_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
+ #else
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+         down_write(&pTask->mm->mmap_sem);
++#else
++        down_write(&pTask->mm->mmap_lock);
++#endif
+         ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+         up_write(&pTask->mm->mmap_sem);
++#else
++        up_write(&pTask->mm->mmap_lock);
++#endif
+ #endif
+     }
+     else
+@@ -232,9 +240,17 @@ static void *rtR0MemObjLinuxDoMmap(RTR3P
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+         ulAddr = vm_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
+ #else
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+         down_write(&pTask->mm->mmap_sem);
++#else
++        down_write(&pTask->mm->mmap_lock);
++#endif
+         ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+         up_write(&pTask->mm->mmap_sem);
++#else
++        up_write(&pTask->mm->mmap_lock);
++#endif
+ #endif
+         if (    !(ulAddr & ~PAGE_MASK)
+             &&  (ulAddr & (uAlignment - 1)))
+@@ -269,13 +285,29 @@ static void rtR0MemObjLinuxDoMunmap(void
+     Assert(pTask == current); RT_NOREF_PV(pTask);
+     vm_munmap((unsigned long)pv, cb);
+ #elif defined(USE_RHEL4_MUNMAP)
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+     down_write(&pTask->mm->mmap_sem);
++#else
++    down_write(&pTask->mm->mmap_lock);
++#endif
+     do_munmap(pTask->mm, (unsigned long)pv, cb, 0); /* should it be 1 or 0? */
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+     up_write(&pTask->mm->mmap_sem);
+ #else
++    up_write(&pTask->mm->mmap_lock);
++#endif
++#else
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+     down_write(&pTask->mm->mmap_sem);
++#else
++    down_write(&pTask->mm->mmap_lock);
++#endif
+     do_munmap(pTask->mm, (unsigned long)pv, cb);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+     up_write(&pTask->mm->mmap_sem);
++#else
++    up_write(&pTask->mm->mmap_lock);
++#endif
+ #endif
+ }
+ 
+@@ -593,7 +625,11 @@ DECLHIDDEN(int) rtR0MemObjNativeFree(RTR
+                 size_t              iPage;
+                 Assert(pTask);
+                 if (pTask && pTask->mm)
+-                    down_read(&pTask->mm->mmap_sem);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
++		    down_read(&pTask->mm->mmap_sem);
++#else
++		    down_read(&pTask->mm->mmap_lock);
++#endif
+ 
+                 iPage = pMemLnx->cPages;
+                 while (iPage-- > 0)
+@@ -608,7 +644,11 @@ DECLHIDDEN(int) rtR0MemObjNativeFree(RTR
+                 }
+ 
+                 if (pTask && pTask->mm)
+-                    up_read(&pTask->mm->mmap_sem);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
++		    up_read(&pTask->mm->mmap_sem);
++#else
++		    up_read(&pTask->mm->mmap_lock);
++#endif
+             }
+             /* else: kernel memory - nothing to do here. */
+             break;
+@@ -1076,7 +1116,11 @@ DECLHIDDEN(int) rtR0MemObjNativeLockUser
+     papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
+     if (papVMAs)
+     {
+-        down_read(&pTask->mm->mmap_sem);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
++	down_read(&pTask->mm->mmap_sem);
++#else
++	down_read(&pTask->mm->mmap_lock);
++#endif
+ 
+         /*
+          * Get user pages.
+@@ -1162,7 +1206,11 @@ DECLHIDDEN(int) rtR0MemObjNativeLockUser
+                 papVMAs[rc]->vm_flags |= VM_DONTCOPY | VM_LOCKED;
+             }
+ 
+-            up_read(&pTask->mm->mmap_sem);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
++	    up_read(&pTask->mm->mmap_sem);
++#else
++	    up_read(&pTask->mm->mmap_lock);
++#endif
+ 
+             RTMemFree(papVMAs);
+ 
+@@ -1189,7 +1237,11 @@ DECLHIDDEN(int) rtR0MemObjNativeLockUser
+ #endif
+         }
+ 
+-        up_read(&pTask->mm->mmap_sem);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
++	up_read(&pTask->mm->mmap_sem);
++#else
++	up_read(&pTask->mm->mmap_lock);
++#endif
+ 
+         RTMemFree(papVMAs);
+         rc = VERR_LOCK_FAILED;
+@@ -1604,7 +1656,11 @@ DECLHIDDEN(int) rtR0MemObjNativeMapUser(
+             const size_t    cPages    = (offSub + cbSub) >> PAGE_SHIFT;
+             size_t          iPage;
+ 
+-            down_write(&pTask->mm->mmap_sem);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
++	    down_write(&pTask->mm->mmap_sem);
++#else
++	    down_write(&pTask->mm->mmap_lock);
++#endif
+ 
+             rc = VINF_SUCCESS;
+             if (pMemLnxToMap->cPages)
+@@ -1721,7 +1777,11 @@ DECLHIDDEN(int) rtR0MemObjNativeMapUser(
+             }
+ #endif /* CONFIG_NUMA_BALANCING */
+ 
+-            up_write(&pTask->mm->mmap_sem);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
++	    up_write(&pTask->mm->mmap_sem);
++#else
++	    up_write(&pTask->mm->mmap_lock);
++#endif
+ 
+             if (RT_SUCCESS(rc))
+             {
diff --git a/fixes_for_module_memory.patch b/fixes_for_module_memory.patch
new file mode 100644
index 0000000..8b55375
--- /dev/null
+++ b/fixes_for_module_memory.patch
@@ -0,0 +1,51 @@
+Index: linux-5.8-rc2/drivers/virt/vboxdrv/r0drv/linux/alloc-r0drv-linux.c
+===================================================================
+--- linux-5.8-rc2/drivers/virt/vboxdrv/r0drv/linux/alloc-r0drv-linux.c
++++ linux-5.8-rc2/drivers/virt/vboxdrv/r0drv/linux/alloc-r0drv-linux.c
+@@ -153,6 +153,8 @@ RT_EXPORT_SYMBOL(RTR0MemExecDonate);
+ 
+ 
+ #ifdef RTMEMALLOC_EXEC_VM_AREA
++
++
+ /**
+  * Allocate executable kernel memory in the module range.
+  *
+@@ -168,7 +170,12 @@ static PRTMEMHDR rtR0MemAllocExecVmArea(
+     struct vm_struct   *pVmArea;
+     size_t              iPage;
+ 
++# if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0)
++    pVmArea = __get_vm_area_caller(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END,
++		    		   __builtin_return_address(0));
++#else
+     pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END);
++#endif
+     if (!pVmArea)
+         return NULL;
+     pVmArea->nr_pages = 0;    /* paranoia? */
+@@ -201,14 +208,21 @@ static PRTMEMHDR rtR0MemAllocExecVmArea(
+ # endif
+         pVmArea->nr_pages = cPages;
+         pVmArea->pages    = papPages;
+-        if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC,
++# if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0)
++	unsigned long start = (unsigned long)pVmArea->addr;
++	unsigned long size = get_vm_area_size(pVmArea);
++
++	if (!map_kernel_range(start, size, PAGE_KERNEL_EXEC, papPages))
++#else
++	if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC,
+ # if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+                          &papPagesIterator
+ # else
+                          papPages
+ # endif
+                          ))
+-        {
++#endif
++	{
+             PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr;
+             pHdrEx->pVmArea     = pVmArea;
+             pHdrEx->pvDummy     = NULL;
+
diff --git a/kernel-5.7-fewer-conditions-for-ARM64_PTR_AUTH.patch b/kernel-5.7-fewer-conditions-for-ARM64_PTR_AUTH.patch
index 63ed69a..4a037ab 100644
--- a/kernel-5.7-fewer-conditions-for-ARM64_PTR_AUTH.patch
+++ b/kernel-5.7-fewer-conditions-for-ARM64_PTR_AUTH.patch
@@ -9,17 +9,17 @@ Given our toolchain is known good (we'll always have binutils >= 2.33.1 and
 current versions of gcc and clang), inside our package disabling those
 preconditions doesn't do any harm.
 
-diff -up linux-5.7/arch/arm64/Kconfig.omv~ linux-5.7/arch/arm64/Kconfig
---- linux-5.7/arch/arm64/Kconfig.omv~	2020-07-25 23:31:02.229462957 +0000
-+++ linux-5.7/arch/arm64/Kconfig	2020-07-25 23:31:12.161907200 +0000
-@@ -1502,12 +1502,6 @@ config ARM64_PTR_AUTH
+diff -up linux-5.8-rc6/arch/arm64/Kconfig.9~ linux-5.8-rc6/arch/arm64/Kconfig
+--- linux-5.8-rc6/arch/arm64/Kconfig.9~	2020-07-26 17:23:47.979426682 +0200
++++ linux-5.8-rc6/arch/arm64/Kconfig	2020-07-26 17:25:15.024771232 +0200
+@@ -1517,12 +1517,6 @@ config ARM64_PTR_AUTH
  	bool "Enable support for pointer authentication"
  	default y
  	depends on !KVM || ARM64_VHE
 -	depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
--	# GCC 9.1 and later inserts a .note.gnu.property section note for PAC
+-	# Modern compilers insert a .note.gnu.property section note for PAC
 -	# which is only understood by binutils starting with version 2.33.1.
--	depends on !CC_IS_GCC || GCC_VERSION < 90100 || LD_VERSION >= 233010000
+-	depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100)
 -	depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
 -	depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
  	help
diff --git a/kernel-release.spec b/kernel-release.spec
index fe33307..9b2211e 100644
--- a/kernel-release.spec
+++ b/kernel-release.spec
@@ -17,8 +17,8 @@
 # This is the place where you set kernel version i.e 4.5.0
 # compose tar.xz name and release
 %define kernelversion	5
-%define patchlevel	7
-%define sublevel	11
+%define patchlevel	8
+%define sublevel	0
 %define relc		%{nil}
 # Only ever wrong on x.0 releases...
 %define previous	%{kernelversion}.%(echo $((%{patchlevel}-1)))
@@ -32,7 +32,7 @@
 %define rpmrel		0.rc%{relc}.1
 %define tar_ver		%{kernelversion}.%{patchlevel}-rc%{relc}
 %else
-%define rpmrel		2
+%define rpmrel		1
 %define tar_ver		%{kernelversion}.%{patchlevel}
 %endif
 %define buildrpmrel	%{rpmrel}%{rpmtag}
@@ -81,13 +81,12 @@
 %bcond_with dracut_all_initrd
 # (tpg) enable patches from ClearLinux
 %bcond_without clr
-%if %mdvver > 3000000
 %bcond_without cross_headers
-%else
-%bcond_with cross_headers
-%endif
+# FIXME re-enable by default when the patches have been adapted to 5.8
+%bcond_with saa716x
+%bcond_with rtl8821ce
 
-%global cross_header_archs	aarch64-linux armv7hnl-linux i686-linux x86_64-linux x32-linux riscv32-linux riscv64-linux aarch64-linuxmusl armv7hnl-linuxmusl i686-linuxmusl x86_64-linuxmusl x32-linuxmusl riscv32-linuxmusl riscv64-linuxmusl aarch64-android armv7l-android armv8l-android x86_64-android aarch64-linuxuclibc armv7hnl-linuxuclibc i686-linuxuclibc x86_64-linuxuclibc x32-linuxuclibc riscv32-linuxuclibc riscv64-linuxuclibc
+%global cross_header_archs	aarch64-linux armv7hnl-linux i686-linux x86_64-linux x32-linux riscv32-linux riscv64-linux aarch64-linuxmusl armv7hnl-linuxmusl i686-linuxmusl x86_64-linuxmusl x32-linuxmusl riscv32-linuxmusl riscv64-linuxmusl aarch64-android armv7l-android armv8l-android x86_64-android aarch64-linuxuclibc armv7hnl-linuxuclibc i686-linuxuclibc x86_64-linuxuclibc x32-linuxuclibc riscv32-linuxuclibc riscv64-linuxuclibc ppc64le-linux ppc64-linux
 %global long_cross_header_archs %(
 	for i in %{cross_header_archs}; do
 		CPU=$(echo $i |cut -d- -f1)
@@ -167,8 +166,7 @@
 	&& RPM_BUILD_NCPUS="`/usr/bin/getconf _NPROCESSORS_ONLN`"; \\\
 	[ "$RPM_BUILD_NCPUS" -gt 1 ] && echo "-P $RPM_BUILD_NCPUS")
 
-# Sparc arch wants sparc64 kernels
-%define target_arch %(echo %{_arch} | sed -e 's/mips.*/mips/' -e 's/arm.*/arm/' -e 's/aarch64/arm64/' -e 's/x86_64/x86/' -e 's/i.86/x86/' -e 's/znver1/x86/' -e 's/riscv.*/riscv/')
+%define target_arch %(echo %{_arch} | sed -e 's/mips.*/mips/' -e 's/arm.*/arm/' -e 's/aarch64/arm64/' -e 's/x86_64/x86/' -e 's/i.86/x86/' -e 's/znver1/x86/' -e 's/riscv.*/riscv/' -e 's/ppc.*/powerpc/')
 
 #
 # SRC RPM description
@@ -211,6 +209,7 @@ Source10:	i386-common.config
 Source11:	arm64-common.config
 Source12:	arm-common.config
 Source13:	znver1-common.config
+Source14:	powerpc-common.config
 # Files called $ARCH-$FLAVOR.config are merged as well,
 # currently there's no need to have specific overloads there.
 
@@ -261,7 +260,7 @@ Source101:      9d55bebd9816903b821a403a69a94190442ac043.patch
 #  it cannot be re-licensed to GPL3 by random patches.
 %if %{with uksm}
 # brokes armx builds
-Patch120:	https://raw.githubusercontent.com/dolohow/uksm/master/v5.x/uksm-5.7.patch
+Patch120:	https://raw.githubusercontent.com/dolohow/uksm/master/v5.x/uksm-5.8.patch
 %endif
 
 %if %{with build_modzstd}
@@ -305,6 +304,11 @@ Patch202:	extra-wifi-drivers-port-to-5.6.patch
 # because they need to be applied after stuff from the
 # virtualbox-kernel-module-sources package is copied around
 Source301:	vbox-6.1-fix-build-on-znver1-hosts.patch
+# Re-export a few symbols vbox wants
+Patch301:	https://gitweb.frugalware.org/wip_kernel/raw/9d0e99ff5fef596388913549a8418c07d367a940/source/base/kernel/fix_virtualbox.patch
+Source302:	https://www.virtualbox.org/raw-attachment/ticket/19644/fixes_for_mm_struct.patch
+Source303:	https://www.virtualbox.org/raw-attachment/ticket/19644/fixes_for_changes_in_cpu_tlbstate.patch
+Source304:	https://www.virtualbox.org/raw-attachment/ticket/19644/fixes_for_module_memory.patch
 
 # Better support for newer x86 processors
 # Original patch:
@@ -469,12 +473,10 @@ Suggests:	microcode-intel
 # Let's pull in some of the most commonly used DKMS modules
 # so end users don't have to install compilers (and worse,
 # get compiler error messages on failures)
-%if %mdvver >= 3000000
 %ifarch %{x86_64}
 BuildRequires:	virtualbox-kernel-module-sources >= 6.1.10
 BuildRequires:	virtualbox-guest-kernel-module-sources >= 6.1.10
 %endif
-%endif
 
 %description
 %common_desc_kernel
@@ -837,7 +839,7 @@ done
 #
 %prep
 %setup -q -n linux-%{tar_ver} -a 140 -a 200
-cp %{S:6} %{S:7} %{S:8} %{S:9} %{S:10} %{S:11} %{S:12} %{S:13} kernel/configs/
+cp %{S:6} %{S:7} %{S:8} %{S:9} %{S:10} %{S:11} %{S:12} %{S:13} %{S:14} kernel/configs/
 %if 0%{sublevel}
 [ -e .git ] || git init
 xzcat %{SOURCE90} |git apply - || git apply %{SOURCE90}
@@ -851,11 +853,14 @@ patch -p1 -R <%{S:101}
 patch -p1 -R <%{S:100}
 %endif
 
+%if %{with saa716x}
 # merge SAA716x DVB driver from extra tarball
 sed -i -e '/saa7164/isource "drivers/media/pci/saa716x/Kconfig"' drivers/media/pci/Kconfig
 sed -i -e '/saa7164/iobj-$(CONFIG_SAA716X_CORE) += saa716x/' drivers/media/pci/Makefile
 find drivers/media/tuners drivers/media/dvb-frontends -name "*.c" -o -name "*.h" |xargs sed -i -e 's,"dvb_frontend.h",<media/dvb_frontend.h>,g'
+%endif
 
+%if %{with rtl8821ce}
 # Merge RTL8723DE and RTL8821CE drivers
 cd drivers/net/wireless
 sed -i -e '/quantenna\/Kconfig/asource "drivers/net/wireless/rtl8821ce/Kconfig' Kconfig
@@ -863,6 +868,7 @@ sed -i -e '/quantenna\/Kconfig/asource "drivers/net/wireless/rtl8723de/Kconfig'
 sed -i -e '/QUANTENNA/aobj-$(CONFIG_RTL8821CE) += rtl8821ce/' Makefile
 sed -i -e '/QUANTENNA/aobj-$(CONFIG_RTL8723DE) += rtl8723de/' Makefile
 cd -
+%endif
 
 %if %{with build_debug}
 %define debug --debug
@@ -874,7 +880,6 @@ cd -
 LC_ALL=C sed -i -e "s/^SUBLEVEL.*/SUBLEVEL = %{sublevel}/" Makefile
 
 # Pull in some externally maintained modules
-%if %mdvver >= 3000000
 %ifarch %{x86_64}
 # === VirtualBox guest additions ===
 %define use_internal_vboxvideo 0
@@ -933,7 +938,9 @@ sed -i -e 's,\$(KBUILD_EXTMOD),drivers/pci/vboxpci,g' drivers/pci/vboxpci/Makefi
 sed -i -e "s,^KERN_DIR.*,KERN_DIR := $(pwd)," drivers/pci/vboxpci/Makefile*
 echo 'obj-m += vboxpci/' >>drivers/pci/Makefile
 patch -p1 -z .301a~ -b <%{S:301}
-%endif
+patch -p1 -z .302a~ -b <%{S:302}
+patch -p1 -z .303a~ -b <%{S:303}
+patch -p1 -z .304a~ -b <%{S:304}
 %endif
 
 # get rid of unwanted files
@@ -988,20 +995,35 @@ sed -i -e "s/^# CONFIG_RD_ZSTD is not set/CONFIG_RD_ZSTD=y/g" kernel/configs/com
 	x86_64|znver1)
 		CONFIGS=x86_64_defconfig
 		;;
+	ppc64)
+		CONFIGS=pseries_defconfig
+		;;
+	ppc64le)
+		CONFIGS="pseries_defconfig arch/powerpc/configs/le.config"
+		;;
 	*)
 		CONFIGS=defconfig
 		;;
 	esac
 
-	for i in common common-${type} ${arch}-common ${arch}-${type}; do
+	for i in common common-${type}; do
 		[ -e kernel/configs/$i.config ] && CONFIGS="$CONFIGS $i.config"
 	done
 	if [ "$arch" = "znver1" ]; then
-		# We need to build with ARCH=x86_64 rather than ARCH=znver1
-		# and pull in both x86_64 and znver1 configs, with the latter
-		# coming last so it can override the former
-		CONFIGS="${CONFIGS/znver1.config/x86_64.config znver1.config}"
+		# Since znver1 is a special case of x86_64, let's pull
+		# in x86_64 configs first (and znver1 configs on top
+		# later -- later configs overwrite earlier ones)
+		for i in x86_64-common x86_64-${type}; do
+			[ -e kernel/configs/$i.config ] && CONFIGS="$CONFIGS $i.config"
+		done
+	fi
+	for i in ${arch}-common ${arch}-${type}; do
+		[ -e kernel/configs/$i.config ] && CONFIGS="$CONFIGS $i.config"
+	done
+	if [ "$arch" = "znver1" -o "$arch" = "x86_64" ]; then
 		arch=x86
+	elif echo $arch |grep -q ^ppc; then
+		arch=powerpc
 	fi
 
 	make ARCH="${arch}" $CONFIGS
@@ -1148,7 +1170,7 @@ SaveDevel() {
     sed -i -e '/rtl8.*/d' $TempDevelRoot/drivers/net/wireless/{Makefile,Kconfig}
 
     for i in alpha arc avr32 blackfin c6x cris csky frv h8300 hexagon ia64 m32r m68k m68knommu metag microblaze \
-		 mips mn10300 nds32 nios2 openrisc parisc powerpc s390 score sh sparc tile unicore32 xtensa; do
+		 mips mn10300 nds32 nios2 openrisc parisc s390 score sh sparc tile unicore32 xtensa; do
 	rm -rf $TempDevelRoot/arch/$i
     done
 
@@ -1173,6 +1195,7 @@ cat > $kernel_devel_files <<EOF
 $DevelRoot/Documentation
 $DevelRoot/arch/arm
 $DevelRoot/arch/arm64
+$DevelRoot/arch/powerpc
 $DevelRoot/arch/riscv
 $DevelRoot/arch/um
 $DevelRoot/arch/x86
@@ -1403,7 +1426,7 @@ install -d %{temp_root}
 ###
 # Build the configs for every arch we care about
 # that way, we can be sure all *.config files have the right additions
-for a in arm arm64 i386 x86_64 znver1; do
+for a in arm arm64 i386 x86_64 znver1 powerpc; do
 	for t in desktop server; do
 		CreateConfig $a $t
 		export ARCH=$a
@@ -1441,6 +1464,10 @@ for a in arm arm64 i386 x86_64 znver1; do
 				riscv*)
 					SARCH=riscv
 					;;
+				ppc*)
+					ARCH=powerpc
+					SARCH=powerpc
+					;;
 				*)
 					[ "$a" != "$TripletArch" ] && continue
 					;;
@@ -1493,7 +1520,7 @@ chmod +x tools/power/cpupower/utils/version-gen.sh
 %endif
 %endif
 
-%kmake -C tools/lib/bpf CC=clang libbpf.a libbpf.pc libbpf.so.0.0.8
+%kmake -C tools/lib/bpf CC=clang libbpf.a libbpf.pc libbpf.so.0.0.9
 cd tools/bpf/bpftool
 %kmake CC=clang bpftool
 cd -
@@ -1614,7 +1641,7 @@ rm -f %{target_source}/*_files.* %{target_source}/README.kernel-sources
 # we remove all the source files that we don't ship
 # first architecture files
 for i in alpha arc avr32 blackfin c6x cris csky frv h8300 hexagon ia64 m32r m68k m68knommu metag microblaze \
-    mips nds32 nios2 openrisc parisc powerpc s390 score sh sh64 sparc tile unicore32 v850 xtensa mn10300; do
+    mips nds32 nios2 openrisc parisc s390 score sh sh64 sparc tile unicore32 v850 xtensa mn10300; do
     rm -rf %{target_source}/arch/$i
 done
 
@@ -1625,6 +1652,7 @@ rm -rf %{target_source}/.tmp_depmod/
 rm -rf %{buildroot}/usr/src/linux-*/uksm.txt
 
 # more cleaning
+rm -f %{target_source}/arch/x86_64/boot/bzImage
 cd %{target_source}
 # lots of gitignore files
 find -iname ".gitignore" -delete
@@ -1661,6 +1689,7 @@ cd -
 %{_kerneldir}/arch/Kconfig
 %{_kerneldir}/arch/arm
 %{_kerneldir}/arch/arm64
+%{_kerneldir}/arch/powerpc
 %{_kerneldir}/arch/riscv
 %{_kerneldir}/arch/um
 %{_kerneldir}/arch/x86
diff --git a/linux-5.8.tar.sign b/linux-5.8.tar.sign
new file mode 100644
index 0000000..c243088
--- /dev/null
+++ b/linux-5.8.tar.sign
@@ -0,0 +1,19 @@
+-----BEGIN PGP SIGNATURE-----
+Comment: This signature is for the .tar version of the archive
+Comment: git archive --format tar --prefix=linux-5.8/ v5.8
+Comment: git version 2.28.0
+
+iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAl8nkEEACgkQONu9yGCS
+aT5S2g/+Ndv+CYgXJA6pPq4OCPek7uLGS8ACKhOZLmCqGWeYHHI4cFqMG8ly8Q4h
+C41SbQ02IqHTYZO8MCbrEXSO7CygxWofneTFWA7VByG4pELpC8CPg9+Vd0+9beCE
+uE+iBDnVfQi02Rd4LJZhZms1U1ZXFvfoHzAHwwc3v9ocxu3e2BVmE/cdI0AkxJv1
+xwh6wh+ppI/yUdrru2iQdN+hQ+uLrxE5HJezS/cTuI8RNop0CvwxIodfXn5o9xab
+av+CI/V6CpUVXztipTTsL2xpkI6O67jFdpgYwIylO1wnyyzi4uGGQLQ16C8aiCX2
+omaF4DpKyshkJKfiChEs6f9+9lmc1eMzALY54LgczZP3Qr6vEOPEuFc2hw69hndI
+lNIB5Kw3N3Tm06tl1QaW4KypulUzXueKV0K8GD5r9+z+R+r2z9DWadNLTslACBnI
+dgg4KlNrMoY8/reguTSv02rdOLWKJI5UV/8OO3urjs11OtYs1uW8p8h8des73k7Z
+SfLj/T/bcDzymN5G8prLM8WIvihxc/nQHGmICWNRqoFns6EEvprZeA9RWFBvn3x3
+8e10uR18P+rBiaIyxn4IzLxxKUP7SvKhWdIxySwaEZZhDo5KxOpLgQEKQCZgFXXQ
+/IqseyAItaAFoI5UxVopUUPQzzZLveLraAweB9aOfiBAshIZWGI=
+=pEA9
+-----END PGP SIGNATURE-----
diff --git a/nvme-pci-more-info.patch b/nvme-pci-more-info.patch
index d4c88dc..959646b 100644
--- a/nvme-pci-more-info.patch
+++ b/nvme-pci-more-info.patch
@@ -1,8 +1,8 @@
-diff -Naur linux-5.3.1/drivers/nvme/host/pci.c linux-5.3.1-p/drivers/nvme/host/pci.c
---- linux-5.3.1/drivers/nvme/host/pci.c	2019-09-21 07:19:47.000000000 +0200
-+++ linux-5.3.1-p/drivers/nvme/host/pci.c	2019-09-27 14:46:38.567818352 +0200
-@@ -2524,8 +2524,11 @@
- 	}
+diff -up linux-5.8-rc1/drivers/nvme/host/pci.c.36~ linux-5.8-rc1/drivers/nvme/host/pci.c
+--- linux-5.8-rc1/drivers/nvme/host/pci.c.36~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/drivers/nvme/host/pci.c	2020-06-16 22:07:57.763065097 +0200
+@@ -2580,8 +2580,11 @@ static void nvme_reset_work(struct work_
+ 	dev->ctrl.max_integrity_segments = 1;
  
  	result = nvme_init_identify(&dev->ctrl);
 -	if (result)
@@ -14,7 +14,7 @@ diff -Naur linux-5.3.1/drivers/nvme/host/pci.c linux-5.3.1-p/drivers/nvme/host/p
  
  	if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
  		if (!dev->ctrl.opal_dev)
-@@ -2547,13 +2550,19 @@
+@@ -2603,13 +2606,19 @@ static void nvme_reset_work(struct work_
  
  	if (dev->ctrl.hmpre) {
  		result = nvme_setup_host_mem(dev);
diff --git a/powerpc-common.config b/powerpc-common.config
new file mode 100644
index 0000000..0fe6406
--- /dev/null
+++ b/powerpc-common.config
@@ -0,0 +1 @@
+CONFIG_PPC64=y
diff --git a/support-kernel-and-ramfs-comp-and-decomp-with-zstd.patch b/support-kernel-and-ramfs-comp-and-decomp-with-zstd.patch
index 28141f1..82bff5b 100644
--- a/support-kernel-and-ramfs-comp-and-decomp-with-zstd.patch
+++ b/support-kernel-and-ramfs-comp-and-decomp-with-zstd.patch
@@ -1,7 +1,7 @@
-diff -Naur linux-5.6.1/arch/x86/boot/compressed/Makefile linux-5.6.1-p/arch/x86/boot/compressed/Makefile
---- linux-5.6.1/arch/x86/boot/compressed/Makefile	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/arch/x86/boot/compressed/Makefile	2020-04-01 13:29:07.958649103 +0200
-@@ -24,7 +24,7 @@
+diff -up linux-5.8-rc1/arch/x86/boot/compressed/Makefile.8~ linux-5.8-rc1/arch/x86/boot/compressed/Makefile
+--- linux-5.8-rc1/arch/x86/boot/compressed/Makefile.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/boot/compressed/Makefile	2020-06-16 21:51:35.330168560 +0200
+@@ -26,7 +26,7 @@ OBJECT_FILES_NON_STANDARD	:= y
  KCOV_INSTRUMENT		:= n
  
  targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
@@ -10,7 +10,7 @@ diff -Naur linux-5.6.1/arch/x86/boot/compressed/Makefile linux-5.6.1-p/arch/x86/
  
  KBUILD_CFLAGS := -m$(BITS) -O2
  KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
-@@ -145,6 +145,8 @@
+@@ -145,6 +145,8 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.al
  	$(call if_changed,lzo)
  $(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
  	$(call if_changed,lz4)
@@ -19,7 +19,7 @@ diff -Naur linux-5.6.1/arch/x86/boot/compressed/Makefile linux-5.6.1-p/arch/x86/
  
  suffix-$(CONFIG_KERNEL_GZIP)	:= gz
  suffix-$(CONFIG_KERNEL_BZIP2)	:= bz2
-@@ -152,6 +154,7 @@
+@@ -152,6 +154,7 @@ suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
  suffix-$(CONFIG_KERNEL_XZ)	:= xz
  suffix-$(CONFIG_KERNEL_LZO) 	:= lzo
  suffix-$(CONFIG_KERNEL_LZ4) 	:= lz4
@@ -27,10 +27,10 @@ diff -Naur linux-5.6.1/arch/x86/boot/compressed/Makefile linux-5.6.1-p/arch/x86/
  
  quiet_cmd_mkpiggy = MKPIGGY $@
        cmd_mkpiggy = $(obj)/mkpiggy $< > $@
-diff -Naur linux-5.6.1/arch/x86/boot/compressed/misc.c linux-5.6.1-p/arch/x86/boot/compressed/misc.c
---- linux-5.6.1/arch/x86/boot/compressed/misc.c	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/arch/x86/boot/compressed/misc.c	2020-04-01 13:29:07.958649103 +0200
-@@ -77,6 +77,10 @@
+diff -up linux-5.8-rc1/arch/x86/boot/compressed/misc.c.8~ linux-5.8-rc1/arch/x86/boot/compressed/misc.c
+--- linux-5.8-rc1/arch/x86/boot/compressed/misc.c.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/boot/compressed/misc.c	2020-06-16 21:51:35.330168560 +0200
+@@ -77,6 +77,10 @@ static int lines, cols;
  #ifdef CONFIG_KERNEL_LZ4
  #include "../../../../lib/decompress_unlz4.c"
  #endif
@@ -41,10 +41,10 @@ diff -Naur linux-5.6.1/arch/x86/boot/compressed/misc.c linux-5.6.1-p/arch/x86/bo
  /*
   * NOTE: When adding a new decompressor, please update the analysis in
   * ../header.S.
-diff -Naur linux-5.6.1/arch/x86/boot/header.S linux-5.6.1-p/arch/x86/boot/header.S
---- linux-5.6.1/arch/x86/boot/header.S	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/arch/x86/boot/header.S	2020-04-01 13:29:03.958623285 +0200
-@@ -536,8 +536,14 @@
+diff -up linux-5.8-rc1/arch/x86/boot/header.S.8~ linux-5.8-rc1/arch/x86/boot/header.S
+--- linux-5.8-rc1/arch/x86/boot/header.S.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/boot/header.S	2020-06-16 21:51:35.330168560 +0200
+@@ -539,8 +539,14 @@ pref_address:		.quad LOAD_PHYSICAL_ADDR
  # the size-dependent part now grows so fast.
  #
  # extra_bytes = (uncompressed_size >> 8) + 65536
@@ -60,9 +60,9 @@ diff -Naur linux-5.6.1/arch/x86/boot/header.S linux-5.6.1-p/arch/x86/boot/header
  #if ZO_z_output_len > ZO_z_input_len
  # define ZO_z_extract_offset	(ZO_z_output_len + ZO_z_extra_bytes - \
  				 ZO_z_input_len)
-diff -Naur linux-5.6.1/arch/x86/include/asm/boot.h linux-5.6.1-p/arch/x86/include/asm/boot.h
---- linux-5.6.1/arch/x86/include/asm/boot.h	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/arch/x86/include/asm/boot.h	2020-04-01 13:29:07.958649103 +0200
+diff -up linux-5.8-rc1/arch/x86/include/asm/boot.h.8~ linux-5.8-rc1/arch/x86/include/asm/boot.h
+--- linux-5.8-rc1/arch/x86/include/asm/boot.h.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/include/asm/boot.h	2020-06-16 21:51:35.330168560 +0200
 @@ -24,9 +24,11 @@
  # error "Invalid value for CONFIG_PHYSICAL_ALIGN"
  #endif
@@ -77,10 +77,10 @@ diff -Naur linux-5.6.1/arch/x86/include/asm/boot.h linux-5.6.1-p/arch/x86/includ
  # define BOOT_HEAP_SIZE		 0x10000
  #endif
  
-diff -Naur linux-5.6.1/arch/x86/Kconfig linux-5.6.1-p/arch/x86/Kconfig
---- linux-5.6.1/arch/x86/Kconfig	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/arch/x86/Kconfig	2020-04-01 13:29:07.958649103 +0200
-@@ -183,6 +183,7 @@
+diff -up linux-5.8-rc1/arch/x86/Kconfig.8~ linux-5.8-rc1/arch/x86/Kconfig
+--- linux-5.8-rc1/arch/x86/Kconfig.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/arch/x86/Kconfig	2020-06-16 21:51:35.331168570 +0200
+@@ -188,6 +188,7 @@ config X86
  	select HAVE_KERNEL_LZMA
  	select HAVE_KERNEL_LZO
  	select HAVE_KERNEL_XZ
@@ -88,10 +88,10 @@ diff -Naur linux-5.6.1/arch/x86/Kconfig linux-5.6.1-p/arch/x86/Kconfig
  	select HAVE_KPROBES
  	select HAVE_KPROBES_ON_FTRACE
  	select HAVE_FUNCTION_ERROR_INJECTION
-diff -Naur linux-5.6.1/Documentation/x86/boot.rst linux-5.6.1-p/Documentation/x86/boot.rst
---- linux-5.6.1/Documentation/x86/boot.rst	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/Documentation/x86/boot.rst	2020-04-01 13:29:07.958649103 +0200
-@@ -786,9 +786,9 @@
+diff -up linux-5.8-rc1/Documentation/x86/boot.rst.8~ linux-5.8-rc1/Documentation/x86/boot.rst
+--- linux-5.8-rc1/Documentation/x86/boot.rst.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/Documentation/x86/boot.rst	2020-06-16 21:51:35.331168570 +0200
+@@ -782,9 +782,9 @@ Protocol:	2.08+
    uncompressed data should be determined using the standard magic
    numbers.  The currently supported compression formats are gzip
    (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A), LZMA
@@ -104,10 +104,10 @@ diff -Naur linux-5.6.1/Documentation/x86/boot.rst linux-5.6.1-p/Documentation/x8
  
  ============	==============
  Field name:	payload_length
-diff -Naur linux-5.6.1/.gitignore linux-5.6.1-p/.gitignore
---- linux-5.6.1/.gitignore	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/.gitignore	2020-04-01 13:29:11.158669756 +0200
-@@ -43,6 +43,7 @@
+diff -up linux-5.8-rc1/.gitignore.8~ linux-5.8-rc1/.gitignore
+--- linux-5.8-rc1/.gitignore.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/.gitignore	2020-06-16 21:51:35.331168570 +0200
+@@ -44,6 +44,7 @@
  *.tab.[ch]
  *.tar
  *.xz
@@ -115,9 +115,9 @@ diff -Naur linux-5.6.1/.gitignore linux-5.6.1-p/.gitignore
  Module.symvers
  modules.builtin
  modules.order
-diff -Naur linux-5.6.1/include/linux/decompress/unzstd.h linux-5.6.1-p/include/linux/decompress/unzstd.h
---- linux-5.6.1/include/linux/decompress/unzstd.h	1970-01-01 01:00:00.000000000 +0100
-+++ linux-5.6.1-p/include/linux/decompress/unzstd.h	2020-04-01 13:28:53.998558979 +0200
+diff -up linux-5.8-rc1/include/linux/decompress/unzstd.h.8~ linux-5.8-rc1/include/linux/decompress/unzstd.h
+--- linux-5.8-rc1/include/linux/decompress/unzstd.h.8~	2020-06-16 21:51:35.331168570 +0200
++++ linux-5.8-rc1/include/linux/decompress/unzstd.h	2020-06-16 21:51:35.331168570 +0200
 @@ -0,0 +1,11 @@
 +/* SPDX-License-Identifier: GPL-2.0 */
 +#ifndef LINUX_DECOMPRESS_UNZSTD_H
@@ -130,10 +130,10 @@ diff -Naur linux-5.6.1/include/linux/decompress/unzstd.h linux-5.6.1-p/include/l
 +	   long *pos,
 +	   void (*error_fn)(char *x));
 +#endif
-diff -Naur linux-5.6.1/init/Kconfig linux-5.6.1-p/init/Kconfig
---- linux-5.6.1/init/Kconfig	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/init/Kconfig	2020-04-01 13:28:58.038585064 +0200
-@@ -173,13 +173,16 @@
+diff -up linux-5.8-rc1/init/Kconfig.8~ linux-5.8-rc1/init/Kconfig
+--- linux-5.8-rc1/init/Kconfig.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/init/Kconfig	2020-06-16 21:51:35.332168580 +0200
+@@ -191,13 +191,16 @@ config HAVE_KERNEL_LZO
  config HAVE_KERNEL_LZ4
  	bool
  
@@ -151,7 +151,7 @@ diff -Naur linux-5.6.1/init/Kconfig linux-5.6.1-p/init/Kconfig
  	help
  	  The linux kernel is a kind of self-extracting executable.
  	  Several compression algorithms are available, which differ
-@@ -258,6 +261,16 @@
+@@ -276,6 +279,16 @@ config KERNEL_LZ4
  	  is about 8% bigger than LZO. But the decompression speed is
  	  faster than LZO.
  
@@ -168,9 +168,9 @@ diff -Naur linux-5.6.1/init/Kconfig linux-5.6.1-p/init/Kconfig
  config KERNEL_UNCOMPRESSED
  	bool "None"
  	depends on HAVE_KERNEL_UNCOMPRESSED
-diff -Naur linux-5.6.1/lib/decompress.c linux-5.6.1-p/lib/decompress.c
---- linux-5.6.1/lib/decompress.c	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/lib/decompress.c	2020-04-01 13:28:53.998558979 +0200
+diff -up linux-5.8-rc1/lib/decompress.c.8~ linux-5.8-rc1/lib/decompress.c
+--- linux-5.8-rc1/lib/decompress.c.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/lib/decompress.c	2020-06-16 21:51:35.332168580 +0200
 @@ -13,6 +13,7 @@
  #include <linux/decompress/inflate.h>
  #include <linux/decompress/unlzo.h>
@@ -189,7 +189,7 @@ diff -Naur linux-5.6.1/lib/decompress.c linux-5.6.1-p/lib/decompress.c
  
  struct compress_format {
  	unsigned char magic[2];
-@@ -52,6 +56,7 @@
+@@ -52,6 +56,7 @@ static const struct compress_format comp
  	{ {0xfd, 0x37}, "xz", unxz },
  	{ {0x89, 0x4c}, "lzo", unlzo },
  	{ {0x02, 0x21}, "lz4", unlz4 },
@@ -197,9 +197,9 @@ diff -Naur linux-5.6.1/lib/decompress.c linux-5.6.1-p/lib/decompress.c
  	{ {0, 0}, NULL, NULL }
  };
  
-diff -Naur linux-5.6.1/lib/decompress_unzstd.c linux-5.6.1-p/lib/decompress_unzstd.c
---- linux-5.6.1/lib/decompress_unzstd.c	1970-01-01 01:00:00.000000000 +0100
-+++ linux-5.6.1-p/lib/decompress_unzstd.c	2020-04-01 13:28:53.998558979 +0200
+diff -up linux-5.8-rc1/lib/decompress_unzstd.c.8~ linux-5.8-rc1/lib/decompress_unzstd.c
+--- linux-5.8-rc1/lib/decompress_unzstd.c.8~	2020-06-16 21:51:35.332168580 +0200
++++ linux-5.8-rc1/lib/decompress_unzstd.c	2020-06-16 21:51:35.332168580 +0200
 @@ -0,0 +1,342 @@
 +// SPDX-License-Identifier: GPL-2.0
 +
@@ -543,10 +543,10 @@ diff -Naur linux-5.6.1/lib/decompress_unzstd.c linux-5.6.1-p/lib/decompress_unzs
 +	return __unzstd(buf, len, fill, flush, out_buf, out_len, pos, error);
 +}
 +#endif
-diff -Naur linux-5.6.1/lib/Kconfig linux-5.6.1-p/lib/Kconfig
---- linux-5.6.1/lib/Kconfig	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/lib/Kconfig	2020-04-01 13:28:53.998558979 +0200
-@@ -336,6 +336,10 @@
+diff -up linux-5.8-rc1/lib/Kconfig.8~ linux-5.8-rc1/lib/Kconfig
+--- linux-5.8-rc1/lib/Kconfig.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/lib/Kconfig	2020-06-16 21:51:35.332168580 +0200
+@@ -342,6 +342,10 @@ config DECOMPRESS_LZ4
  	select LZ4_DECOMPRESS
  	tristate
  
@@ -557,10 +557,10 @@ diff -Naur linux-5.6.1/lib/Kconfig linux-5.6.1-p/lib/Kconfig
  #
  # Generic allocator support is selected if needed
  #
-diff -Naur linux-5.6.1/lib/Makefile linux-5.6.1-p/lib/Makefile
---- linux-5.6.1/lib/Makefile	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/lib/Makefile	2020-04-01 13:28:53.998558979 +0200
-@@ -160,6 +160,7 @@
+diff -up linux-5.8-rc1/lib/Makefile.8~ linux-5.8-rc1/lib/Makefile
+--- linux-5.8-rc1/lib/Makefile.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/lib/Makefile	2020-06-16 21:51:35.332168580 +0200
+@@ -170,6 +170,7 @@ lib-$(CONFIG_DECOMPRESS_LZMA) += decompr
  lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
  lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
  lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o
@@ -568,10 +568,10 @@ diff -Naur linux-5.6.1/lib/Makefile linux-5.6.1-p/lib/Makefile
  
  obj-$(CONFIG_TEXTSEARCH) += textsearch.o
  obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
-diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
---- linux-5.6.1/lib/xxhash.c	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/lib/xxhash.c	2020-04-01 13:28:51.078540121 +0200
-@@ -80,13 +80,11 @@
+diff -up linux-5.8-rc1/lib/xxhash.c.8~ linux-5.8-rc1/lib/xxhash.c
+--- linux-5.8-rc1/lib/xxhash.c.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/lib/xxhash.c	2020-06-16 21:51:35.333168590 +0200
+@@ -80,13 +80,11 @@ void xxh32_copy_state(struct xxh32_state
  {
  	memcpy(dst, src, sizeof(*dst));
  }
@@ -585,7 +585,7 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  
  /*-***************************
   * Simple Hash Functions
-@@ -151,7 +149,6 @@
+@@ -151,7 +149,6 @@ uint32_t xxh32(const void *input, const
  
  	return h32;
  }
@@ -593,7 +593,7 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  
  static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
  {
-@@ -234,7 +231,6 @@
+@@ -234,7 +231,6 @@ uint64_t xxh64(const void *input, const
  
  	return h64;
  }
@@ -601,7 +601,7 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  
  /*-**************************************************
   * Advanced Hash Functions
-@@ -251,7 +247,6 @@
+@@ -251,7 +247,6 @@ void xxh32_reset(struct xxh32_state *sta
  	state.v4 = seed - PRIME32_1;
  	memcpy(statePtr, &state, sizeof(state));
  }
@@ -609,7 +609,7 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  
  void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
  {
-@@ -265,7 +260,6 @@
+@@ -265,7 +260,6 @@ void xxh64_reset(struct xxh64_state *sta
  	state.v4 = seed - PRIME64_1;
  	memcpy(statePtr, &state, sizeof(state));
  }
@@ -617,7 +617,7 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  
  int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
  {
-@@ -334,7 +328,6 @@
+@@ -334,7 +328,6 @@ int xxh32_update(struct xxh32_state *sta
  
  	return 0;
  }
@@ -625,7 +625,7 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  
  uint32_t xxh32_digest(const struct xxh32_state *state)
  {
-@@ -372,7 +365,6 @@
+@@ -372,7 +365,6 @@ uint32_t xxh32_digest(const struct xxh32
  
  	return h32;
  }
@@ -633,7 +633,7 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  
  int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
  {
-@@ -439,7 +431,6 @@
+@@ -439,7 +431,6 @@ int xxh64_update(struct xxh64_state *sta
  
  	return 0;
  }
@@ -641,7 +641,7 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  
  uint64_t xxh64_digest(const struct xxh64_state *state)
  {
-@@ -494,7 +485,19 @@
+@@ -494,7 +485,19 @@ uint64_t xxh64_digest(const struct xxh64
  
  	return h64;
  }
@@ -661,10 +661,10 @@ diff -Naur linux-5.6.1/lib/xxhash.c linux-5.6.1-p/lib/xxhash.c
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_DESCRIPTION("xxHash");
 +#endif
-diff -Naur linux-5.6.1/lib/zstd/decompress.c linux-5.6.1-p/lib/zstd/decompress.c
---- linux-5.6.1/lib/zstd/decompress.c	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/lib/zstd/decompress.c	2020-04-01 13:28:46.908513191 +0200
-@@ -2490,6 +2490,7 @@
+diff -up linux-5.8-rc1/lib/zstd/decompress.c.8~ linux-5.8-rc1/lib/zstd/decompress.c
+--- linux-5.8-rc1/lib/zstd/decompress.c.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/lib/zstd/decompress.c	2020-06-16 21:51:35.333168590 +0200
+@@ -2490,6 +2490,7 @@ size_t ZSTD_decompressStream(ZSTD_DStrea
  	}
  }
  
@@ -672,14 +672,14 @@ diff -Naur linux-5.6.1/lib/zstd/decompress.c linux-5.6.1-p/lib/zstd/decompress.c
  EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound);
  EXPORT_SYMBOL(ZSTD_initDCtx);
  EXPORT_SYMBOL(ZSTD_decompressDCtx);
-@@ -2529,3 +2530,4 @@
+@@ -2529,3 +2530,4 @@ EXPORT_SYMBOL(ZSTD_insertBlock);
  
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_DESCRIPTION("Zstd Decompressor");
 +#endif
-diff -Naur linux-5.6.1/lib/zstd/fse_decompress.c linux-5.6.1-p/lib/zstd/fse_decompress.c
---- linux-5.6.1/lib/zstd/fse_decompress.c	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/lib/zstd/fse_decompress.c	2020-04-01 13:28:46.908513191 +0200
+diff -up linux-5.8-rc1/lib/zstd/fse_decompress.c.8~ linux-5.8-rc1/lib/zstd/fse_decompress.c
+--- linux-5.8-rc1/lib/zstd/fse_decompress.c.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/lib/zstd/fse_decompress.c	2020-06-16 21:51:35.333168590 +0200
 @@ -47,6 +47,7 @@
  ****************************************************************/
  #include "bitstream.h"
@@ -703,10 +703,10 @@ diff -Naur linux-5.6.1/lib/zstd/fse_decompress.c linux-5.6.1-p/lib/zstd/fse_deco
  /* **************************************************************
  *  Templates
  ****************************************************************/
-diff -Naur linux-5.6.1/lib/zstd/zstd_internal.h linux-5.6.1-p/lib/zstd/zstd_internal.h
---- linux-5.6.1/lib/zstd/zstd_internal.h	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/lib/zstd/zstd_internal.h	2020-04-01 13:28:46.908513191 +0200
-@@ -127,7 +127,14 @@
+diff -up linux-5.8-rc1/lib/zstd/zstd_internal.h.8~ linux-5.8-rc1/lib/zstd/zstd_internal.h
+--- linux-5.8-rc1/lib/zstd/zstd_internal.h.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/lib/zstd/zstd_internal.h	2020-06-16 21:51:35.333168590 +0200
+@@ -127,7 +127,14 @@ static const U32 OF_defaultNormLog = OF_
  *  Shared functions to include for inlining
  *********************************************/
  ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
@@ -722,7 +722,7 @@ diff -Naur linux-5.6.1/lib/zstd/zstd_internal.h linux-5.6.1-p/lib/zstd/zstd_inte
  }
  /*! ZSTD_wildcopy() :
  *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
-@@ -137,13 +144,16 @@
+@@ -137,13 +144,16 @@ ZSTD_STATIC void ZSTD_wildcopy(void *dst
  	const BYTE* ip = (const BYTE*)src;
  	BYTE* op = (BYTE*)dst;
  	BYTE* const oend = op + length;
@@ -740,12 +740,12 @@ diff -Naur linux-5.6.1/lib/zstd/zstd_internal.h linux-5.6.1-p/lib/zstd/zstd_inte
  	do {
  		ZSTD_copy8(op, ip);
  		op += 8;
-diff -Naur linux-5.6.1/scripts/Makefile.lib linux-5.6.1-p/scripts/Makefile.lib
---- linux-5.6.1/scripts/Makefile.lib	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/scripts/Makefile.lib	2020-04-01 13:28:58.038585064 +0200
-@@ -394,6 +394,21 @@
+diff -up linux-5.8-rc1/scripts/Makefile.lib.8~ linux-5.8-rc1/scripts/Makefile.lib
+--- linux-5.8-rc1/scripts/Makefile.lib.8~	2020-06-16 21:51:35.333168590 +0200
++++ linux-5.8-rc1/scripts/Makefile.lib	2020-06-16 21:52:10.151514756 +0200
+@@ -408,6 +408,21 @@ quiet_cmd_xzkern = XZKERN  $@
  quiet_cmd_xzmisc = XZMISC  $@
-       cmd_xzmisc = cat $(real-prereqs) | xz --check=crc32 --lzma2=dict=1MiB > $@
+       cmd_xzmisc = cat $(real-prereqs) | $(XZ) --check=crc32 --lzma2=dict=1MiB > $@
  
 +# ZSTD
 +# ---------------------------------------------------------------------------
@@ -765,10 +765,10 @@ diff -Naur linux-5.6.1/scripts/Makefile.lib linux-5.6.1-p/scripts/Makefile.lib
  # ASM offsets
  # ---------------------------------------------------------------------------
  
-diff -Naur linux-5.6.1/usr/Kconfig linux-5.6.1-p/usr/Kconfig
---- linux-5.6.1/usr/Kconfig	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/usr/Kconfig	2020-04-01 13:29:00.978604046 +0200
-@@ -100,6 +100,15 @@
+diff -up linux-5.8-rc1/usr/Kconfig.8~ linux-5.8-rc1/usr/Kconfig
+--- linux-5.8-rc1/usr/Kconfig.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/usr/Kconfig	2020-06-16 21:51:35.334168600 +0200
+@@ -100,6 +100,15 @@ config RD_LZ4
  	  Support loading of a LZ4 encoded initial ramdisk or cpio buffer
  	  If unsure, say N.
  
@@ -784,7 +784,7 @@ diff -Naur linux-5.6.1/usr/Kconfig linux-5.6.1-p/usr/Kconfig
  choice
  	prompt "Built-in initramfs compression mode"
  	depends on INITRAMFS_SOURCE != ""
-@@ -207,4 +216,15 @@
+@@ -207,4 +216,15 @@ config INITRAMFS_COMPRESSION_NONE
  	  short time at boot, while both the cpio image and the unpacked
  	  filesystem image will be present in memory simultaneously
  
@@ -800,10 +800,10 @@ diff -Naur linux-5.6.1/usr/Kconfig linux-5.6.1-p/usr/Kconfig
 +	  tool to be able to compress the initram.
 +
  endchoice
-diff -Naur linux-5.6.1/usr/Makefile linux-5.6.1-p/usr/Makefile
---- linux-5.6.1/usr/Makefile	2020-04-01 10:58:19.000000000 +0200
-+++ linux-5.6.1-p/usr/Makefile	2020-04-01 13:29:00.978604046 +0200
-@@ -15,6 +15,7 @@
+diff -up linux-5.8-rc1/usr/Makefile.8~ linux-5.8-rc1/usr/Makefile
+--- linux-5.8-rc1/usr/Makefile.8~	2020-06-14 21:45:04.000000000 +0200
++++ linux-5.8-rc1/usr/Makefile	2020-06-16 21:51:35.334168600 +0200
+@@ -15,6 +15,7 @@ compress-$(CONFIG_INITRAMFS_COMPRESSION_
  compress-$(CONFIG_INITRAMFS_COMPRESSION_XZ)	:= xzmisc
  compress-$(CONFIG_INITRAMFS_COMPRESSION_LZO)	:= lzo
  compress-$(CONFIG_INITRAMFS_COMPRESSION_LZ4)	:= lz4
diff --git a/uksm-5.8.patch b/uksm-5.8.patch
new file mode 100644
index 0000000..4bd1d46
--- /dev/null
+++ b/uksm-5.8.patch
@@ -0,0 +1,6934 @@
+diff --git a/Documentation/vm/uksm.txt b/Documentation/vm/uksm.txt
+new file mode 100644
+index 000000000000..be19a3127001
+--- /dev/null
++++ b/Documentation/vm/uksm.txt
+@@ -0,0 +1,61 @@
++The Ultra Kernel Samepage Merging feature
++----------------------------------------------
++/*
++ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
++ *
++ * This is an improvement upon KSM. Some basic data structures and routines
++ * are borrowed from ksm.c .
++ *
++ * Its new features:
++ * 1. Full system scan:
++ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
++ *      interaction to submit a memory area to KSM is no longer needed.
++ *
++ * 2. Rich area detection:
++ *      It automatically detects rich areas containing abundant duplicated
++ *      pages based. Rich areas are given a full scan speed. Poor areas are
++ *      sampled at a reasonable speed with very low CPU consumption.
++ *
++ * 3. Ultra Per-page scan speed improvement:
++ *      A new hash algorithm is proposed. As a result, on a machine with
++ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
++ *      can scan memory areas that does not contain duplicated pages at speed of
++ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
++ *      477MB/sec ~ 923MB/sec.
++ *
++ * 4. Thrashing area avoidance:
++ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
++ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
++ *      hash value based volatile page detection.
++ *
++ *
++ * 5. Misc changes upon KSM:
++ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
++ *        comparison. It's much faster than default C version on x86.
++ *      * rmap_item now has an struct *page member to loosely cache a
++ *        address-->page mapping, which reduces too much time-costly
++ *        follow_page().
++ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
++ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
++ *        ksm is needed for this case.
++ *
++ * 6. Full Zero Page consideration(contributed by Figo Zhang)
++ *    Now uksmd consider full zero pages as special pages and merge them to an
++ *    special unswappable uksm zero page.
++ */
++
++ChangeLog:
++
++2012-05-05 The creation of this Doc
++2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up.
++2012-05-28 UKSM 0.1.1.2 bug fix release
++2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2
++2012-07-2  UKSM 0.1.2-beta2
++2012-07-10 UKSM 0.1.2-beta3
++2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization.
++2012-10-13 UKSM 0.1.2.1 Bug fixes.
++2012-12-31 UKSM 0.1.2.2 Minor bug fixes.
++2014-07-02 UKSM 0.1.2.3 Fix a " __this_cpu_read() in preemptible bug".
++2015-04-22 UKSM 0.1.2.4 Fix a race condition that can sometimes trigger anonying warnings.
++2016-09-10 UKSM 0.1.2.5 Fix a bug in dedup ratio calculation.
++2017-02-26 UKSM 0.1.2.6 Fix a bug in hugetlbpage handling and a race bug with page migration.
+diff --git a/fs/exec.c b/fs/exec.c
+index e6e8a9a70327..aa7b0ab67afa 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -62,6 +62,7 @@
+ #include <linux/oom.h>
+ #include <linux/compat.h>
+ #include <linux/vmalloc.h>
++#include <linux/ksm.h>
+ 
+ #include <linux/uaccess.h>
+ #include <asm/mmu_context.h>
+diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
+index e9a6841fc25b..2d485b822b5f 100644
+--- a/fs/proc/meminfo.c
++++ b/fs/proc/meminfo.c
+@@ -108,7 +108,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
+ #endif
+ 	show_val_kb(m, "PageTables:     ",
+ 		    global_zone_page_state(NR_PAGETABLE));
+-
++#ifdef CONFIG_UKSM
++	show_val_kb(m, "KsmZeroPages:     ",
++		    global_zone_page_state(NR_UKSM_ZERO_PAGES));
++#endif
+ 	show_val_kb(m, "NFS_Unstable:   ", 0);
+ 	show_val_kb(m, "Bounce:         ",
+ 		    global_zone_page_state(NR_BOUNCE));
+diff --git a/include/linux/ksm.h b/include/linux/ksm.h
+index e48b1e453ff5..8cc8077cebf4 100644
+--- a/include/linux/ksm.h
++++ b/include/linux/ksm.h
+@@ -21,20 +21,16 @@ struct mem_cgroup;
+ #ifdef CONFIG_KSM
+ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+ 		unsigned long end, int advice, unsigned long *vm_flags);
+-int __ksm_enter(struct mm_struct *mm);
+-void __ksm_exit(struct mm_struct *mm);
+ 
+-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++static inline struct stable_node *page_stable_node(struct page *page)
+ {
+-	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+-		return __ksm_enter(mm);
+-	return 0;
++	return PageKsm(page) ? page_rmapping(page) : NULL;
+ }
+ 
+-static inline void ksm_exit(struct mm_struct *mm)
++static inline void set_page_stable_node(struct page *page,
++					struct stable_node *stable_node)
+ {
+-	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+-		__ksm_exit(mm);
++	page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
+ }
+ 
+ /*
+@@ -56,6 +52,33 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+ bool reuse_ksm_page(struct page *page,
+ 			struct vm_area_struct *vma, unsigned long address);
+ 
++#ifdef CONFIG_KSM_LEGACY
++int __ksm_enter(struct mm_struct *mm);
++void __ksm_exit(struct mm_struct *mm);
++static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
++		return __ksm_enter(mm);
++	return 0;
++}
++
++static inline void ksm_exit(struct mm_struct *mm)
++{
++	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
++		__ksm_exit(mm);
++}
++
++#elif defined(CONFIG_UKSM)
++static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++	return 0;
++}
++
++static inline void ksm_exit(struct mm_struct *mm)
++{
++}
++#endif /* !CONFIG_UKSM */
++
+ #else  /* !CONFIG_KSM */
+ 
+ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+@@ -96,4 +119,6 @@ static inline bool reuse_ksm_page(struct page *page,
+ #endif /* CONFIG_MMU */
+ #endif /* !CONFIG_KSM */
+ 
++#include <linux/uksm.h>
++
+ #endif /* __LINUX_KSM_H */
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 64ede5f150dc..f7c58b497e88 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -367,6 +367,9 @@ struct vm_area_struct {
+ 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+ #endif
+ 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
++#ifdef CONFIG_UKSM
++	struct vma_slot *uksm_vma_slot;
++#endif
+ } __randomize_layout;
+ 
+ struct core_thread {
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index f6f884970511..8ff0d75c2036 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -165,6 +165,9 @@ enum zone_stat_item {
+ 	NR_ZSPAGES,		/* allocated in zsmalloc */
+ #endif
+ 	NR_FREE_CMA_PAGES,
++#ifdef CONFIG_UKSM
++	NR_UKSM_ZERO_PAGES,
++#endif
+ 	NR_VM_ZONE_STAT_ITEMS };
+ 
+ enum node_stat_item {
+diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
+index 56c1e8eb7bb0..757d18225cb0 100644
+--- a/include/linux/pgtable.h
++++ b/include/linux/pgtable.h
+@@ -1027,12 +1027,25 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ extern void untrack_pfn_moved(struct vm_area_struct *vma);
+ #endif
+ 
++#ifdef CONFIG_UKSM
++static inline int is_uksm_zero_pfn(unsigned long pfn)
++{
++	extern unsigned long uksm_zero_pfn;
++	return pfn == uksm_zero_pfn;
++}
++#else
++static inline int is_uksm_zero_pfn(unsigned long pfn)
++{
++	return 0;
++}
++#endif
++
+ #ifdef __HAVE_COLOR_ZERO_PAGE
+ static inline int is_zero_pfn(unsigned long pfn)
+ {
+ 	extern unsigned long zero_pfn;
+ 	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
+-	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
++	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT) || is_uksm_zero_pfn(pfn);
+ }
+ 
+ #define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
+@@ -1041,7 +1054,7 @@ static inline int is_zero_pfn(unsigned long pfn)
+ static inline int is_zero_pfn(unsigned long pfn)
+ {
+ 	extern unsigned long zero_pfn;
+-	return pfn == zero_pfn;
++	return (pfn == zero_pfn) || (is_uksm_zero_pfn(pfn));
+ }
+ 
+ static inline unsigned long my_zero_pfn(unsigned long addr)
+diff --git a/include/linux/sradix-tree.h b/include/linux/sradix-tree.h
+new file mode 100644
+index 000000000000..d71edba6b63f
+--- /dev/null
++++ b/include/linux/sradix-tree.h
+@@ -0,0 +1,77 @@
++#ifndef _LINUX_SRADIX_TREE_H
++#define _LINUX_SRADIX_TREE_H
++
++
++#define INIT_SRADIX_TREE(root, mask)					\
++do {									\
++	(root)->height = 0;						\
++	(root)->gfp_mask = (mask);					\
++	(root)->rnode = NULL;						\
++} while (0)
++
++#define ULONG_BITS	(sizeof(unsigned long) * 8)
++#define SRADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
++//#define SRADIX_TREE_MAP_SHIFT	6
++//#define SRADIX_TREE_MAP_SIZE	(1UL << SRADIX_TREE_MAP_SHIFT)
++//#define SRADIX_TREE_MAP_MASK	(SRADIX_TREE_MAP_SIZE-1)
++
++struct sradix_tree_node {
++	unsigned int	height;		/* Height from the bottom */
++	unsigned int	count;
++	unsigned int	fulls;		/* Number of full sublevel trees */
++	struct sradix_tree_node *parent;
++	void *stores[0];
++};
++
++/* A simple radix tree implementation */
++struct sradix_tree_root {
++	unsigned int            height;
++	struct sradix_tree_node *rnode;
++
++	/* Where found to have available empty stores in its sublevels */
++	struct sradix_tree_node *enter_node;
++	unsigned int shift;
++	unsigned int stores_size;
++	unsigned int mask;
++	unsigned long min;	/* The first hole index */
++	unsigned long num;
++	//unsigned long *height_to_maxindex;
++
++	/* How the node is allocated and freed. */
++	struct sradix_tree_node *(*alloc)(void);
++	void (*free)(struct sradix_tree_node *node);
++
++	/* When a new node is added and removed */
++	void (*extend)(struct sradix_tree_node *parent, struct sradix_tree_node *child);
++	void (*assign)(struct sradix_tree_node *node, unsigned int index, void *item);
++	void (*rm)(struct sradix_tree_node *node, unsigned int offset);
++};
++
++struct sradix_tree_path {
++	struct sradix_tree_node *node;
++	int offset;
++};
++
++static inline
++void init_sradix_tree_root(struct sradix_tree_root *root, unsigned long shift)
++{
++	root->height = 0;
++	root->rnode = NULL;
++	root->shift = shift;
++	root->stores_size = 1UL << shift;
++	root->mask = root->stores_size - 1;
++}
++
++
++extern void *sradix_tree_next(struct sradix_tree_root *root,
++		       struct sradix_tree_node *node, unsigned long index,
++		       int (*iter)(void *, unsigned long));
++
++extern int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num);
++
++extern void sradix_tree_delete_from_leaf(struct sradix_tree_root *root,
++			struct sradix_tree_node *node, unsigned long index);
++
++extern void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index);
++
++#endif /* _LINUX_SRADIX_TREE_H */
+diff --git a/include/linux/uksm.h b/include/linux/uksm.h
+new file mode 100644
+index 000000000000..bb8651f534f2
+--- /dev/null
++++ b/include/linux/uksm.h
+@@ -0,0 +1,149 @@
++#ifndef __LINUX_UKSM_H
++#define __LINUX_UKSM_H
++/*
++ * Memory merging support.
++ *
++ * This code enables dynamic sharing of identical pages found in different
++ * memory areas, even if they are not shared by fork().
++ */
++
++/* if !CONFIG_UKSM this file should not be compiled at all. */
++#ifdef CONFIG_UKSM
++
++#include <linux/bitops.h>
++#include <linux/mm.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/sched.h>
++
++extern unsigned long zero_pfn __read_mostly;
++extern unsigned long uksm_zero_pfn __read_mostly;
++extern struct page *empty_uksm_zero_page;
++
++/* must be done before linked to mm */
++extern void uksm_vma_add_new(struct vm_area_struct *vma);
++extern void uksm_remove_vma(struct vm_area_struct *vma);
++
++#define UKSM_SLOT_NEED_SORT	(1 << 0)
++#define UKSM_SLOT_NEED_RERAND	(1 << 1)
++#define UKSM_SLOT_SCANNED	(1 << 2) /* It's scanned in this round */
++#define UKSM_SLOT_FUL_SCANNED	(1 << 3)
++#define UKSM_SLOT_IN_UKSM	(1 << 4)
++
++struct vma_slot {
++	struct sradix_tree_node *snode;
++	unsigned long sindex;
++
++	struct list_head slot_list;
++	unsigned long fully_scanned_round;
++	unsigned long dedup_num;
++	unsigned long pages_scanned;
++	unsigned long this_sampled;
++	unsigned long last_scanned;
++	unsigned long pages_to_scan;
++	struct scan_rung *rung;
++	struct page **rmap_list_pool;
++	unsigned int *pool_counts;
++	unsigned long pool_size;
++	struct vm_area_struct *vma;
++	struct mm_struct *mm;
++	unsigned long ctime_j;
++	unsigned long pages;
++	unsigned long flags;
++	unsigned long pages_cowed; /* pages cowed this round */
++	unsigned long pages_merged; /* pages merged this round */
++	unsigned long pages_bemerged;
++
++	/* when it has page merged in this eval round */
++	struct list_head dedup_list;
++};
++
++static inline void uksm_unmap_zero_page(pte_t pte)
++{
++	if (pte_pfn(pte) == uksm_zero_pfn)
++		__dec_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
++}
++
++static inline void uksm_map_zero_page(pte_t pte)
++{
++	if (pte_pfn(pte) == uksm_zero_pfn)
++		__inc_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
++}
++
++static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
++{
++	if (vma->uksm_vma_slot && PageKsm(page))
++		vma->uksm_vma_slot->pages_cowed++;
++}
++
++static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
++{
++	if (vma->uksm_vma_slot && pte_pfn(pte) == uksm_zero_pfn)
++		vma->uksm_vma_slot->pages_cowed++;
++}
++
++static inline int uksm_flags_can_scan(unsigned long vm_flags)
++{
++#ifdef VM_SAO
++		if (vm_flags & VM_SAO)
++			return 0;
++#endif
++
++	return !(vm_flags & (VM_PFNMAP | VM_IO  | VM_DONTEXPAND |
++			     VM_HUGETLB | VM_MIXEDMAP | VM_SHARED
++			     | VM_MAYSHARE | VM_GROWSUP | VM_GROWSDOWN));
++}
++
++static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
++{
++	if (uksm_flags_can_scan(*vm_flags_p))
++		*vm_flags_p |= VM_MERGEABLE;
++}
++
++/*
++ * Just a wrapper for BUG_ON for where ksm_zeropage must not be. TODO: it will
++ * be removed when uksm zero page patch is stable enough.
++ */
++static inline void uksm_bugon_zeropage(pte_t pte)
++{
++	BUG_ON(pte_pfn(pte) == uksm_zero_pfn);
++}
++#else
++static inline void uksm_vma_add_new(struct vm_area_struct *vma)
++{
++}
++
++static inline void uksm_remove_vma(struct vm_area_struct *vma)
++{
++}
++
++static inline void uksm_unmap_zero_page(pte_t pte)
++{
++}
++
++static inline void uksm_map_zero_page(pte_t pte)
++{
++}
++
++static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
++{
++}
++
++static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
++{
++}
++
++static inline int uksm_flags_can_scan(unsigned long vm_flags)
++{
++	return 0;
++}
++
++static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
++{
++}
++
++static inline void uksm_bugon_zeropage(pte_t pte)
++{
++}
++#endif /* !CONFIG_UKSM */
++#endif /* __LINUX_UKSM_H */
+diff --git a/kernel/fork.c b/kernel/fork.c
+index efc5493203ae..97bd3a1ca732 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -603,7 +603,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+ 		__vma_link_rb(mm, tmp, rb_link, rb_parent);
+ 		rb_link = &tmp->vm_rb.rb_right;
+ 		rb_parent = &tmp->vm_rb;
+-
++		uksm_vma_add_new(tmp);
+ 		mm->map_count++;
+ 		if (!(tmp->vm_flags & VM_WIPEONFORK))
+ 			retval = copy_page_range(mm, oldmm, mpnt);
+diff --git a/lib/Makefile b/lib/Makefile
+index b1c42c10073b..dbd64025955c 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -29,7 +29,7 @@ endif
+ KCSAN_SANITIZE_random32.o := n
+ 
+ lib-y := ctype.o string.o vsprintf.o cmdline.o \
+-	 rbtree.o radix-tree.o timerqueue.o xarray.o \
++	 rbtree.o radix-tree.o sradix-tree.o timerqueue.o xarray.o \
+ 	 idr.o extable.o sha1.o irq_regs.o argv_split.o \
+ 	 flex_proportions.o ratelimit.o show_mem.o \
+ 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
+diff --git a/lib/sradix-tree.c b/lib/sradix-tree.c
+new file mode 100644
+index 000000000000..ab21e6309b93
+--- /dev/null
++++ b/lib/sradix-tree.c
+@@ -0,0 +1,476 @@
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/spinlock.h>
++#include <linux/slab.h>
++#include <linux/gcd.h>
++#include <linux/sradix-tree.h>
++
++static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node)
++{
++	return node->fulls == root->stores_size ||
++		(node->height == 1 && node->count == root->stores_size);
++}
++
++/*
++ *	Extend a sradix tree so it can store key @index.
++ */
++static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index)
++{
++	struct sradix_tree_node *node;
++	unsigned int height;
++
++	if (unlikely(root->rnode == NULL)) {
++		if (!(node = root->alloc()))
++			return -ENOMEM;
++
++		node->height = 1;
++		root->rnode = node;
++		root->height = 1;
++	}
++
++	/* Figure out what the height should be.  */
++	height = root->height;
++	index >>= root->shift * height;
++
++	while (index) {
++		index >>= root->shift;
++		height++;
++	}
++
++	while (height > root->height) {
++		unsigned int newheight;
++
++		if (!(node = root->alloc()))
++			return -ENOMEM;
++
++		/* Increase the height.  */
++		node->stores[0] = root->rnode;
++		root->rnode->parent = node;
++		if (root->extend)
++			root->extend(node, root->rnode);
++
++		newheight = root->height + 1;
++		node->height = newheight;
++		node->count = 1;
++		if (sradix_node_full(root, root->rnode))
++			node->fulls = 1;
++
++		root->rnode = node;
++		root->height = newheight;
++	}
++
++	return 0;
++}
++
++/*
++ * Search the next item from the current node, that is not NULL
++ * and can satify root->iter().
++ */
++void *sradix_tree_next(struct sradix_tree_root *root,
++		       struct sradix_tree_node *node, unsigned long index,
++		       int (*iter)(void *item, unsigned long height))
++{
++	unsigned long offset;
++	void *item;
++
++	if (unlikely(node == NULL)) {
++		node = root->rnode;
++		for (offset = 0; offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (unlikely(offset >= root->stores_size))
++			return NULL;
++
++		if (node->height == 1)
++			return item;
++		else
++			goto go_down;
++	}
++
++	while (node) {
++		offset = (index & root->mask) + 1;
++		for (; offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (offset < root->stores_size)
++			break;
++
++		node = node->parent;
++		index >>= root->shift;
++	}
++
++	if (!node)
++		return NULL;
++
++	while (node->height > 1) {
++go_down:
++		node = item;
++		for (offset = 0; offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (unlikely(offset >= root->stores_size))
++			return NULL;
++	}
++
++	BUG_ON(offset > root->stores_size);
++
++	return item;
++}
++
++/*
++ * Blindly insert the item to the tree. Typically, we reuse the
++ * first empty store item.
++ */
++int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num)
++{
++	unsigned long index;
++	unsigned int height;
++	struct sradix_tree_node *node, *tmp = NULL;
++	int offset, offset_saved;
++	void **store = NULL;
++	int error, i, j, shift;
++
++go_on:
++	index = root->min;
++
++	if (root->enter_node && !sradix_node_full(root, root->enter_node)) {
++		node = root->enter_node;
++		BUG_ON((index >> (root->shift * root->height)));
++	} else {
++		node = root->rnode;
++		if (node == NULL || (index >> (root->shift * root->height))
++		    || sradix_node_full(root, node)) {
++			error = sradix_tree_extend(root, index);
++			if (error)
++				return error;
++
++			node = root->rnode;
++		}
++	}
++
++
++	height = node->height;
++	shift = (height - 1) * root->shift;
++	offset = (index >> shift) & root->mask;
++	while (shift > 0) {
++		offset_saved = offset;
++		for (; offset < root->stores_size; offset++) {
++			store = &node->stores[offset];
++			tmp = *store;
++
++			if (!tmp || !sradix_node_full(root, tmp))
++				break;
++		}
++		BUG_ON(offset >= root->stores_size);
++
++		if (offset != offset_saved) {
++			index += (offset - offset_saved) << shift;
++			index &= ~((1UL << shift) - 1);
++		}
++
++		if (!tmp) {
++			if (!(tmp = root->alloc()))
++				return -ENOMEM;
++
++			tmp->height = shift / root->shift;
++			*store = tmp;
++			tmp->parent = node;
++			node->count++;
++//			if (root->extend)
++//				root->extend(node, tmp);
++		}
++
++		node = tmp;
++		shift -= root->shift;
++		offset = (index >> shift) & root->mask;
++	}
++
++	BUG_ON(node->height != 1);
++
++
++	store = &node->stores[offset];
++	for (i = 0, j = 0;
++	      j < root->stores_size - node->count &&
++	      i < root->stores_size - offset && j < num; i++) {
++		if (!store[i]) {
++			store[i] = item[j];
++			if (root->assign)
++				root->assign(node, index + i, item[j]);
++			j++;
++		}
++	}
++
++	node->count += j;
++	root->num += j;
++	num -= j;
++
++	while (sradix_node_full(root, node)) {
++		node = node->parent;
++		if (!node)
++			break;
++
++		node->fulls++;
++	}
++
++	if (unlikely(!node)) {
++		/* All nodes are full */
++		root->min = 1 << (root->height * root->shift);
++		root->enter_node = NULL;
++	} else {
++		root->min = index + i - 1;
++		root->min |= (1UL << (node->height - 1)) - 1;
++		root->min++;
++		root->enter_node = node;
++	}
++
++	if (num) {
++		item += j;
++		goto go_on;
++	}
++
++	return 0;
++}
++
++
++/**
++ *	sradix_tree_shrink    -    shrink height of a sradix tree to minimal
++ *      @root		sradix tree root
++ *
++ */
++static inline void sradix_tree_shrink(struct sradix_tree_root *root)
++{
++	/* try to shrink tree height */
++	while (root->height > 1) {
++		struct sradix_tree_node *to_free = root->rnode;
++
++		/*
++		 * The candidate node has more than one child, or its child
++		 * is not at the leftmost store, we cannot shrink.
++		 */
++		if (to_free->count != 1 || !to_free->stores[0])
++			break;
++
++		root->rnode = to_free->stores[0];
++		root->rnode->parent = NULL;
++		root->height--;
++		if (unlikely(root->enter_node == to_free))
++			root->enter_node = NULL;
++		root->free(to_free);
++	}
++}
++
++/*
++ * Del the item on the known leaf node and index
++ */
++void sradix_tree_delete_from_leaf(struct sradix_tree_root *root,
++				  struct sradix_tree_node *node, unsigned long index)
++{
++	unsigned int offset;
++	struct sradix_tree_node *start, *end;
++
++	BUG_ON(node->height != 1);
++
++	start = node;
++	while (node && !(--node->count))
++		node = node->parent;
++
++	end = node;
++	if (!node) {
++		root->rnode = NULL;
++		root->height = 0;
++		root->min = 0;
++		root->num = 0;
++		root->enter_node = NULL;
++	} else {
++		offset = (index >> (root->shift * (node->height - 1))) & root->mask;
++		if (root->rm)
++			root->rm(node, offset);
++		node->stores[offset] = NULL;
++		root->num--;
++		if (root->min > index) {
++			root->min = index;
++			root->enter_node = node;
++		}
++	}
++
++	if (start != end) {
++		do {
++			node = start;
++			start = start->parent;
++			if (unlikely(root->enter_node == node))
++				root->enter_node = end;
++			root->free(node);
++		} while (start != end);
++
++		/*
++		 * Note that shrink may free "end", so enter_node still need to
++		 * be checked inside.
++		 */
++		sradix_tree_shrink(root);
++	} else if (node->count == root->stores_size - 1) {
++		/* It WAS a full leaf node. Update the ancestors */
++		node = node->parent;
++		while (node) {
++			node->fulls--;
++			if (node->fulls != root->stores_size - 1)
++				break;
++
++			node = node->parent;
++		}
++	}
++}
++
++void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index)
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node;
++	int shift;
++
++	node = root->rnode;
++	if (node == NULL || (index >> (root->shift * root->height)))
++		return NULL;
++
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		node = node->stores[offset];
++		if (!node)
++			return NULL;
++
++		shift -= root->shift;
++	} while (shift >= 0);
++
++	return node;
++}
++
++/*
++ * Return the item if it exists, otherwise create it in place
++ * and return the created item.
++ */
++void *sradix_tree_lookup_create(struct sradix_tree_root *root,
++			unsigned long index, void *(*item_alloc)(void))
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node, *tmp;
++	void *item;
++	int shift, error;
++
++	if (root->rnode == NULL || (index >> (root->shift * root->height))) {
++		if (item_alloc) {
++			error = sradix_tree_extend(root, index);
++			if (error)
++				return NULL;
++		} else {
++			return NULL;
++		}
++	}
++
++	node = root->rnode;
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		if (!node->stores[offset]) {
++			if (!(tmp = root->alloc()))
++				return NULL;
++
++			tmp->height = shift / root->shift;
++			node->stores[offset] = tmp;
++			tmp->parent = node;
++			node->count++;
++			node = tmp;
++		} else {
++			node = node->stores[offset];
++		}
++
++		shift -= root->shift;
++	} while (shift > 0);
++
++	BUG_ON(node->height != 1);
++	offset = index & root->mask;
++	if (node->stores[offset]) {
++		return node->stores[offset];
++	} else if (item_alloc) {
++		if (!(item = item_alloc()))
++			return NULL;
++
++		node->stores[offset] = item;
++
++		/*
++		 * NOTE: we do NOT call root->assign here, since this item is
++		 * newly created by us having no meaning. Caller can call this
++		 * if it's necessary to do so.
++		 */
++
++		node->count++;
++		root->num++;
++
++		while (sradix_node_full(root, node)) {
++			node = node->parent;
++			if (!node)
++				break;
++
++			node->fulls++;
++		}
++
++		if (unlikely(!node)) {
++			/* All nodes are full */
++			root->min = 1 << (root->height * root->shift);
++		} else {
++			if (root->min == index) {
++				root->min |= (1UL << (node->height - 1)) - 1;
++				root->min++;
++				root->enter_node = node;
++			}
++		}
++
++		return item;
++	} else {
++		return NULL;
++	}
++
++}
++
++int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index)
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node;
++	int shift;
++
++	node = root->rnode;
++	if (node == NULL || (index >> (root->shift * root->height)))
++		return -ENOENT;
++
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		node = node->stores[offset];
++		if (!node)
++			return -ENOENT;
++
++		shift -= root->shift;
++	} while (shift > 0);
++
++	offset = index & root->mask;
++	if (!node->stores[offset])
++		return -ENOENT;
++
++	sradix_tree_delete_from_leaf(root, node, index);
++
++	return 0;
++}
+diff --git a/mm/Kconfig b/mm/Kconfig
+index f2104cc0d35c..6787eab6fea8 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -321,6 +321,32 @@ config KSM
+ 	  See Documentation/vm/ksm.rst for more information: KSM is inactive
+ 	  until a program has madvised that an area is MADV_MERGEABLE, and
+ 	  root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
++choice
++	prompt "Choose UKSM/KSM strategy"
++	default UKSM
++	depends on KSM
++	help
++	  This option allows to select a UKSM/KSM stragety.
++
++config UKSM
++	bool "Ultra-KSM for page merging"
++	depends on KSM
++	help
++	UKSM is inspired by the Linux kernel project \u2014 KSM(Kernel Same
++	page Merging), but with a fundamentally rewritten core algorithm. With
++	an advanced algorithm, UKSM now can transparently scans all anonymously
++	mapped user space applications with an significantly improved scan speed
++	and CPU efficiency. Since KVM is friendly to KSM, KVM can also benefit from
++	UKSM. Now UKSM has its first stable release and first real world enterprise user.
++	For more information, please goto its project page.
++	(github.com/dolohow/uksm)
++
++config KSM_LEGACY
++	bool "Legacy KSM implementation"
++	depends on KSM
++	help
++	The legacy KSM implementation from Red Hat.
++endchoice
+ 
+ config DEFAULT_MMAP_MIN_ADDR
+ 	int "Low address space to protect from user allocation"
+diff --git a/mm/Makefile b/mm/Makefile
+index 6e9d46b2efc9..fae71acbe7f0 100644
+--- a/mm/Makefile
++++ b/mm/Makefile
+@@ -76,7 +76,8 @@ obj-$(CONFIG_SPARSEMEM)	+= sparse.o
+ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
+ obj-$(CONFIG_SLOB) += slob.o
+ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+-obj-$(CONFIG_KSM) += ksm.o
++obj-$(CONFIG_KSM_LEGACY) += ksm.o
++obj-$(CONFIG_UKSM) += uksm.o
+ obj-$(CONFIG_PAGE_POISONING) += page_poison.o
+ obj-$(CONFIG_SLAB) += slab.o
+ obj-$(CONFIG_SLUB) += slub.o
+diff --git a/mm/ksm.c b/mm/ksm.c
+index 4102034cd55a..13ea01100de2 100644
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -857,17 +857,6 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
+ 	return err;
+ }
+ 
+-static inline struct stable_node *page_stable_node(struct page *page)
+-{
+-	return PageKsm(page) ? page_rmapping(page) : NULL;
+-}
+-
+-static inline void set_page_stable_node(struct page *page,
+-					struct stable_node *stable_node)
+-{
+-	page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
+-}
+-
+ #ifdef CONFIG_SYSFS
+ /*
+  * Only called through the sysfs control interface:
+diff --git a/mm/memory.c b/mm/memory.c
+index 3ecad55103ad..fe4cb386639f 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -143,6 +143,25 @@ EXPORT_SYMBOL(zero_pfn);
+ 
+ unsigned long highest_memmap_pfn __read_mostly;
+ 
++#ifdef CONFIG_UKSM
++unsigned long uksm_zero_pfn __read_mostly;
++EXPORT_SYMBOL_GPL(uksm_zero_pfn);
++struct page *empty_uksm_zero_page;
++
++static int __init setup_uksm_zero_page(void)
++{
++	empty_uksm_zero_page = alloc_pages(__GFP_ZERO & ~__GFP_MOVABLE, 0);
++	if (!empty_uksm_zero_page)
++		panic("Oh boy, that early out of memory?");
++
++	SetPageReserved(empty_uksm_zero_page);
++	uksm_zero_pfn = page_to_pfn(empty_uksm_zero_page);
++
++	return 0;
++}
++core_initcall(setup_uksm_zero_page);
++#endif
++
+ /*
+  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
+  */
+@@ -158,6 +177,7 @@ void mm_trace_rss_stat(struct mm_struct *mm, int member, long count)
+ 	trace_rss_stat(mm, member, count);
+ }
+ 
++
+ #if defined(SPLIT_RSS_COUNTING)
+ 
+ void sync_mm_rss(struct mm_struct *mm)
+@@ -801,6 +821,11 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ 		get_page(page);
+ 		page_dup_rmap(page, false);
+ 		rss[mm_counter(page)]++;
++
++		/* Should return NULL in vm_normal_page() */
++		uksm_bugon_zeropage(pte);
++	} else {
++		uksm_map_zero_page(pte);
+ 	}
+ 
+ out_set_pte:
+@@ -1073,8 +1098,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
+ 			ptent = ptep_get_and_clear_full(mm, addr, pte,
+ 							tlb->fullmm);
+ 			tlb_remove_tlb_entry(tlb, pte, addr);
+-			if (unlikely(!page))
++			if (unlikely(!page)) {
++				uksm_unmap_zero_page(ptent);
+ 				continue;
++			}
+ 
+ 			if (!PageAnon(page)) {
+ 				if (pte_dirty(ptent)) {
+@@ -2410,6 +2437,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
+ 
+ 	if (likely(src)) {
+ 		copy_user_highpage(dst, src, addr, vma);
++		uksm_cow_page(vma, src);
+ 		return true;
+ 	}
+ 
+@@ -2655,6 +2683,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
+ 							      vmf->address);
+ 		if (!new_page)
+ 			goto oom;
++		uksm_cow_pte(vma, vmf->orig_pte);
+ 	} else {
+ 		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
+ 				vmf->address);
+@@ -2697,7 +2726,9 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
+ 						mm_counter_file(old_page));
+ 				inc_mm_counter_fast(mm, MM_ANONPAGES);
+ 			}
++			uksm_bugon_zeropage(vmf->orig_pte);
+ 		} else {
++			uksm_unmap_zero_page(vmf->orig_pte);
+ 			inc_mm_counter_fast(mm, MM_ANONPAGES);
+ 		}
+ 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 8c7ca737a19b..a924cb128844 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -46,6 +46,7 @@
+ #include <linux/moduleparam.h>
+ #include <linux/pkeys.h>
+ #include <linux/oom.h>
++#include <linux/ksm.h>
+ #include <linux/sched/mm.h>
+ 
+ #include <linux/uaccess.h>
+@@ -181,6 +182,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+ 	if (vma->vm_file)
+ 		fput(vma->vm_file);
+ 	mpol_put(vma_policy(vma));
++       uksm_remove_vma(vma);
+ 	vm_area_free(vma);
+ 	return next;
+ }
+@@ -708,9 +710,16 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ 	long adjust_next = 0;
+ 	int remove_next = 0;
+ 
++/*
++ * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is
++ * acquired
++ */
++	uksm_remove_vma(vma);
++
+ 	if (next && !insert) {
+ 		struct vm_area_struct *exporter = NULL, *importer = NULL;
+ 
++		uksm_remove_vma(next);
+ 		if (end >= next->vm_end) {
+ 			/*
+ 			 * vma expands, overlapping all the next, and
+@@ -841,6 +850,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ 		end_changed = true;
+ 	}
+ 	vma->vm_pgoff = pgoff;
++
+ 	if (adjust_next) {
+ 		next->vm_start += adjust_next << PAGE_SHIFT;
+ 		next->vm_pgoff += adjust_next;
+@@ -946,6 +956,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ 		if (remove_next == 2) {
+ 			remove_next = 1;
+ 			end = next->vm_end;
++			uksm_remove_vma(next);
+ 			goto again;
+ 		}
+ 		else if (next)
+@@ -972,10 +983,14 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ 			 */
+ 			VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
+ 		}
++	} else {
++		if (next && !insert)
++			uksm_vma_add_new(next);
+ 	}
+ 	if (insert && file)
+ 		uprobe_mmap(insert);
+ 
++	uksm_vma_add_new(vma);
+ 	validate_mm(mm);
+ 
+ 	return 0;
+@@ -1434,6 +1449,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
+ 	vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+ 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+ 
++	/* If uksm is enabled, we add VM_MERGEABLE to new VMAs. */
++	uksm_vm_flags_mod(&vm_flags);
++
+ 	if (flags & MAP_LOCKED)
+ 		if (!can_do_mlock())
+ 			return -EPERM;
+@@ -1801,6 +1819,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
+ 			allow_write_access(file);
+ 	}
+ 	file = vma->vm_file;
++	uksm_vma_add_new(vma);
+ out:
+ 	perf_event_mmap(vma);
+ 
+@@ -1843,6 +1862,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
+ 	if (vm_flags & VM_DENYWRITE)
+ 		allow_write_access(file);
+ free_vma:
++	uksm_remove_vma(vma);
+ 	vm_area_free(vma);
+ unacct_error:
+ 	if (charged)
+@@ -2705,6 +2725,8 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+ 	else
+ 		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ 
++	uksm_vma_add_new(new);
++
+ 	/* Success. */
+ 	if (!err)
+ 		return 0;
+@@ -3012,6 +3034,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
+ 	if ((flags & (~VM_EXEC)) != 0)
+ 		return -EINVAL;
+ 	flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
++	uksm_vm_flags_mod(&flags);
+ 
+ 	mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+ 	if (IS_ERR_VALUE(mapped_addr))
+@@ -3062,6 +3085,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
+ 	vma->vm_flags = flags;
+ 	vma->vm_page_prot = vm_get_page_prot(flags);
+ 	vma_link(mm, vma, prev, rb_link, rb_parent);
++	uksm_vma_add_new(vma);
+ out:
+ 	perf_event_mmap(vma);
+ 	mm->total_vm += len >> PAGE_SHIFT;
+@@ -3139,6 +3163,12 @@ void exit_mmap(struct mm_struct *mm)
+ 		mmap_write_unlock(mm);
+ 	}
+ 
++	/*
++	 * Taking write lock on mmap does not harm others,
++	 * but it's crucial for uksm to avoid races.
++	 */
++	mmap_write_lock(mm);
++
+ 	if (mm->locked_vm) {
+ 		vma = mm->mmap;
+ 		while (vma) {
+@@ -3173,6 +3203,11 @@ void exit_mmap(struct mm_struct *mm)
+ 		vma = remove_vma(vma);
+ 	}
+ 	vm_unacct_memory(nr_accounted);
++
++	mm->mmap = NULL;
++	mm->mm_rb = RB_ROOT;
++	vmacache_invalidate(mm);
++	mmap_write_unlock(mm);
+ }
+ 
+ /* Insert vm structure into process list sorted by address
+@@ -3280,6 +3315,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+ 			new_vma->vm_ops->open(new_vma);
+ 		vma_link(mm, new_vma, prev, rb_link, rb_parent);
+ 		*need_rmap_locks = false;
++		uksm_vma_add_new(new_vma);
+ 	}
+ 	return new_vma;
+ 
+@@ -3432,6 +3468,7 @@ static struct vm_area_struct *__install_special_mapping(
+ 	vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
+ 
+ 	perf_event_mmap(vma);
++	uksm_vma_add_new(vma);
+ 
+ 	return vma;
+ 
+diff --git a/mm/uksm.c b/mm/uksm.c
+new file mode 100644
+index 000000000000..c762d1c1d8a7
+--- /dev/null
++++ b/mm/uksm.c
+@@ -0,0 +1,5613 @@
++/*
++ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
++ *
++ * This is an improvement upon KSM. Some basic data structures and routines
++ * are borrowed from ksm.c .
++ *
++ * Its new features:
++ * 1. Full system scan:
++ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
++ *      interaction to submit a memory area to KSM is no longer needed.
++ *
++ * 2. Rich area detection:
++ *      It automatically detects rich areas containing abundant duplicated
++ *      pages based. Rich areas are given a full scan speed. Poor areas are
++ *      sampled at a reasonable speed with very low CPU consumption.
++ *
++ * 3. Ultra Per-page scan speed improvement:
++ *      A new hash algorithm is proposed. As a result, on a machine with
++ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
++ *      can scan memory areas that does not contain duplicated pages at speed of
++ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
++ *      477MB/sec ~ 923MB/sec.
++ *
++ * 4. Thrashing area avoidance:
++ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
++ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
++ *      hash value based volatile page detection.
++ *
++ *
++ * 5. Misc changes upon KSM:
++ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
++ *        comparison. It's much faster than default C version on x86.
++ *      * rmap_item now has an struct *page member to loosely cache a
++ *        address-->page mapping, which reduces too much time-costly
++ *        follow_page().
++ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
++ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
++ *        ksm is needed for this case.
++ *
++ * 6. Full Zero Page consideration(contributed by Figo Zhang)
++ *    Now uksmd consider full zero pages as special pages and merge them to an
++ *    special unswappable uksm zero page.
++ */
++
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/fs.h>
++#include <linux/mman.h>
++#include <linux/sched.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/coredump.h>
++#include <linux/sched/cputime.h>
++#include <linux/rwsem.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/spinlock.h>
++#include <linux/jhash.h>
++#include <linux/delay.h>
++#include <linux/kthread.h>
++#include <linux/wait.h>
++#include <linux/slab.h>
++#include <linux/rbtree.h>
++#include <linux/memory.h>
++#include <linux/mmu_notifier.h>
++#include <linux/swap.h>
++#include <linux/ksm.h>
++#include <linux/crypto.h>
++#include <linux/scatterlist.h>
++#include <crypto/hash.h>
++#include <linux/random.h>
++#include <linux/math64.h>
++#include <linux/gcd.h>
++#include <linux/freezer.h>
++#include <linux/oom.h>
++#include <linux/numa.h>
++#include <linux/sradix-tree.h>
++
++#include <asm/tlbflush.h>
++#include "internal.h"
++
++#ifdef CONFIG_X86
++#undef memcmp
++
++#ifdef CONFIG_X86_32
++#define memcmp memcmpx86_32
++/*
++ * Compare 4-byte-aligned address s1 and s2, with length n
++ */
++int memcmpx86_32(void *s1, void *s2, size_t n)
++{
++	size_t num = n / 4;
++	register int res;
++
++	__asm__ __volatile__
++	(
++	 "testl %3,%3\n\t"
++	 "repe; cmpsd\n\t"
++	 "je        1f\n\t"
++	 "sbbl      %0,%0\n\t"
++	 "orl       $1,%0\n"
++	 "1:"
++	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
++	 : "0" (0)
++	 : "cc");
++
++	return res;
++}
++
++/*
++ * Check the page is all zero ?
++ */
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned char same;
++
++	len /= 4;
++
++	__asm__ __volatile__
++	("repe; scasl;"
++	 "sete %0"
++	 : "=qm" (same), "+D" (s1), "+c" (len)
++	 : "a" (0)
++	 : "cc");
++
++	return same;
++}
++
++
++#elif defined(CONFIG_X86_64)
++#define memcmp memcmpx86_64
++/*
++ * Compare 8-byte-aligned address s1 and s2, with length n
++ */
++int memcmpx86_64(void *s1, void *s2, size_t n)
++{
++	size_t num = n / 8;
++	register int res;
++
++	__asm__ __volatile__
++	(
++	 "testq %q3,%q3\n\t"
++	 "repe; cmpsq\n\t"
++	 "je        1f\n\t"
++	 "sbbq      %q0,%q0\n\t"
++	 "orq       $1,%q0\n"
++	 "1:"
++	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
++	 : "0" (0)
++	 : "cc");
++
++	return res;
++}
++
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned char same;
++
++	len /= 8;
++
++	__asm__ __volatile__
++	("repe; scasq;"
++	 "sete %0"
++	 : "=qm" (same), "+D" (s1), "+c" (len)
++	 : "a" (0)
++	 : "cc");
++
++	return same;
++}
++
++#endif
++#else
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned long *src = s1;
++	int i;
++
++	len /= sizeof(*src);
++
++	for (i = 0; i < len; i++) {
++		if (src[i])
++			return 0;
++	}
++
++	return 1;
++}
++#endif
++
++#define UKSM_RUNG_ROUND_FINISHED  (1 << 0)
++#define TIME_RATIO_SCALE	10000
++
++#define SLOT_TREE_NODE_SHIFT	8
++#define SLOT_TREE_NODE_STORE_SIZE	(1UL << SLOT_TREE_NODE_SHIFT)
++struct slot_tree_node {
++	unsigned long size;
++	struct sradix_tree_node snode;
++	void *stores[SLOT_TREE_NODE_STORE_SIZE];
++};
++
++static struct kmem_cache *slot_tree_node_cachep;
++
++static struct sradix_tree_node *slot_tree_node_alloc(void)
++{
++	struct slot_tree_node *p;
++
++	p = kmem_cache_zalloc(slot_tree_node_cachep, GFP_KERNEL |
++			      __GFP_NORETRY | __GFP_NOWARN);
++	if (!p)
++		return NULL;
++
++	return &p->snode;
++}
++
++static void slot_tree_node_free(struct sradix_tree_node *node)
++{
++	struct slot_tree_node *p;
++
++	p = container_of(node, struct slot_tree_node, snode);
++	kmem_cache_free(slot_tree_node_cachep, p);
++}
++
++static void slot_tree_node_extend(struct sradix_tree_node *parent,
++				  struct sradix_tree_node *child)
++{
++	struct slot_tree_node *p, *c;
++
++	p = container_of(parent, struct slot_tree_node, snode);
++	c = container_of(child, struct slot_tree_node, snode);
++
++	p->size += c->size;
++}
++
++void slot_tree_node_assign(struct sradix_tree_node *node,
++			   unsigned int index, void *item)
++{
++	struct vma_slot *slot = item;
++	struct slot_tree_node *cur;
++
++	slot->snode = node;
++	slot->sindex = index;
++
++	while (node) {
++		cur = container_of(node, struct slot_tree_node, snode);
++		cur->size += slot->pages;
++		node = node->parent;
++	}
++}
++
++void slot_tree_node_rm(struct sradix_tree_node *node, unsigned int offset)
++{
++	struct vma_slot *slot;
++	struct slot_tree_node *cur;
++	unsigned long pages;
++
++	if (node->height == 1) {
++		slot = node->stores[offset];
++		pages = slot->pages;
++	} else {
++		cur = container_of(node->stores[offset],
++				   struct slot_tree_node, snode);
++		pages = cur->size;
++	}
++
++	while (node) {
++		cur = container_of(node, struct slot_tree_node, snode);
++		cur->size -= pages;
++		node = node->parent;
++	}
++}
++
++unsigned long slot_iter_index;
++int slot_iter(void *item,  unsigned long height)
++{
++	struct slot_tree_node *node;
++	struct vma_slot *slot;
++
++	if (height == 1) {
++		slot = item;
++		if (slot_iter_index < slot->pages) {
++			/*in this one*/
++			return 1;
++		} else {
++			slot_iter_index -= slot->pages;
++			return 0;
++		}
++
++	} else {
++		node = container_of(item, struct slot_tree_node, snode);
++		if (slot_iter_index < node->size) {
++			/*in this one*/
++			return 1;
++		} else {
++			slot_iter_index -= node->size;
++			return 0;
++		}
++	}
++}
++
++
++static inline void slot_tree_init_root(struct sradix_tree_root *root)
++{
++	init_sradix_tree_root(root, SLOT_TREE_NODE_SHIFT);
++	root->alloc = slot_tree_node_alloc;
++	root->free = slot_tree_node_free;
++	root->extend = slot_tree_node_extend;
++	root->assign = slot_tree_node_assign;
++	root->rm = slot_tree_node_rm;
++}
++
++void slot_tree_init(void)
++{
++	slot_tree_node_cachep = kmem_cache_create("slot_tree_node",
++				sizeof(struct slot_tree_node), 0,
++				SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
++				NULL);
++}
++
++
++/* Each rung of this ladder is a list of VMAs having a same scan ratio */
++struct scan_rung {
++	//struct list_head scanned_list;
++	struct sradix_tree_root vma_root;
++	struct sradix_tree_root vma_root2;
++
++	struct vma_slot *current_scan;
++	unsigned long current_offset;
++
++	/*
++	 * The initial value for current_offset, it should loop over
++	 * [0~ step - 1] to let all slot have its chance to be scanned.
++	 */
++	unsigned long offset_init;
++	unsigned long step; /* dynamic step for current_offset */
++	unsigned int flags;
++	unsigned long pages_to_scan;
++	//unsigned long fully_scanned_slots;
++	/*
++	 * a little bit tricky - if cpu_time_ratio > 0, then the value is the
++	 * the cpu time ratio it can spend in rung_i for every scan
++	 * period. if < 0, then it is the cpu time ratio relative to the
++	 * max cpu percentage user specified. Both in unit of
++	 * 1/TIME_RATIO_SCALE
++	 */
++	int cpu_ratio;
++
++	/*
++	 * How long it will take for all slots in this rung to be fully
++	 * scanned? If it's zero, we don't care about the cover time:
++	 * it's fully scanned.
++	 */
++	unsigned int cover_msecs;
++	//unsigned long vma_num;
++	//unsigned long pages; /* Sum of all slot's pages in rung */
++};
++
++/**
++ * node of either the stable or unstale rbtree
++ *
++ */
++struct tree_node {
++	struct rb_node node; /* link in the main (un)stable rbtree */
++	struct rb_root sub_root; /* rb_root for sublevel collision rbtree */
++	u32 hash;
++	unsigned long count; /* TODO: merged with sub_root */
++	struct list_head all_list; /* all tree nodes in stable/unstable tree */
++};
++
++/**
++ * struct stable_node - node of the stable rbtree
++ * @node: rb node of this ksm page in the stable tree
++ * @hlist: hlist head of rmap_items using this ksm page
++ * @kpfn: page frame number of this ksm page
++ */
++struct stable_node {
++	struct rb_node node; /* link in sub-rbtree */
++	struct tree_node *tree_node; /* it's tree node root in stable tree, NULL if it's in hell list */
++	struct hlist_head hlist;
++	unsigned long kpfn;
++	u32 hash_max; /* if ==0 then it's not been calculated yet */
++	struct list_head all_list; /* in a list for all stable nodes */
++};
++
++/**
++ * struct node_vma - group rmap_items linked in a same stable
++ * node together.
++ */
++struct node_vma {
++	union {
++		struct vma_slot *slot;
++		unsigned long key;  /* slot is used as key sorted on hlist */
++	};
++	struct hlist_node hlist;
++	struct hlist_head rmap_hlist;
++	struct stable_node *head;
++};
++
++/**
++ * struct rmap_item - reverse mapping item for virtual addresses
++ * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
++ * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
++ * @mm: the memory structure this rmap_item is pointing into
++ * @address: the virtual address this rmap_item tracks (+ flags in low bits)
++ * @node: rb node of this rmap_item in the unstable tree
++ * @head: pointer to stable_node heading this list in the stable tree
++ * @hlist: link into hlist of rmap_items hanging off that stable_node
++ */
++struct rmap_item {
++	struct vma_slot *slot;
++	struct page *page;
++	unsigned long address;	/* + low bits used for flags below */
++	unsigned long hash_round;
++	unsigned long entry_index;
++	union {
++		struct {/* when in unstable tree */
++			struct rb_node node;
++			struct tree_node *tree_node;
++			u32 hash_max;
++		};
++		struct { /* when in stable tree */
++			struct node_vma *head;
++			struct hlist_node hlist;
++			struct anon_vma *anon_vma;
++		};
++	};
++} __aligned(4);
++
++struct rmap_list_entry {
++	union {
++		struct rmap_item *item;
++		unsigned long addr;
++	};
++	/* lowest bit is used for is_addr tag */
++} __aligned(4); /* 4 aligned to fit in to pages*/
++
++
++/* Basic data structure definition ends */
++
++
++/*
++ * Flags for rmap_item to judge if it's listed in the stable/unstable tree.
++ * The flags use the low bits of rmap_item.address
++ */
++#define UNSTABLE_FLAG	0x1
++#define STABLE_FLAG	0x2
++#define get_rmap_addr(x)	((x)->address & PAGE_MASK)
++
++/*
++ * rmap_list_entry helpers
++ */
++#define IS_ADDR_FLAG	1
++#define is_addr(ptr)		((unsigned long)(ptr) & IS_ADDR_FLAG)
++#define set_is_addr(ptr)	((ptr) |= IS_ADDR_FLAG)
++#define get_clean_addr(ptr)	(((ptr) & ~(__typeof__(ptr))IS_ADDR_FLAG))
++
++
++/*
++ * High speed caches for frequently allocated and freed structs
++ */
++static struct kmem_cache *rmap_item_cache;
++static struct kmem_cache *stable_node_cache;
++static struct kmem_cache *node_vma_cache;
++static struct kmem_cache *vma_slot_cache;
++static struct kmem_cache *tree_node_cache;
++#define UKSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("uksm_"#__struct,\
++		sizeof(struct __struct), __alignof__(struct __struct),\
++		(__flags), NULL)
++
++/* Array of all scan_rung, uksm_scan_ladder[0] having the minimum scan ratio */
++#define SCAN_LADDER_SIZE 4
++static struct scan_rung uksm_scan_ladder[SCAN_LADDER_SIZE];
++
++/* The evaluation rounds uksmd has finished */
++static unsigned long long uksm_eval_round = 1;
++
++/*
++ * we add 1 to this var when we consider we should rebuild the whole
++ * unstable tree.
++ */
++static unsigned long uksm_hash_round = 1;
++
++/*
++ * How many times the whole memory is scanned.
++ */
++static unsigned long long fully_scanned_round = 1;
++
++/* The total number of virtual pages of all vma slots */
++static u64 uksm_pages_total;
++
++/* The number of pages has been scanned since the start up */
++static u64 uksm_pages_scanned;
++
++static u64 scanned_virtual_pages;
++
++/* The number of pages has been scanned since last encode_benefit call */
++static u64 uksm_pages_scanned_last;
++
++/* If the scanned number is tooo large, we encode it here */
++static u64 pages_scanned_stored;
++
++static unsigned long pages_scanned_base;
++
++/* The number of nodes in the stable tree */
++static unsigned long uksm_pages_shared;
++
++/* The number of page slots additionally sharing those nodes */
++static unsigned long uksm_pages_sharing;
++
++/* The number of nodes in the unstable tree */
++static unsigned long uksm_pages_unshared;
++
++/*
++ * Milliseconds ksmd should sleep between scans,
++ * >= 100ms to be consistent with
++ * scan_time_to_sleep_msec()
++ */
++static unsigned int uksm_sleep_jiffies;
++
++/* The real value for the uksmd next sleep */
++static unsigned int uksm_sleep_real;
++
++/* Saved value for user input uksm_sleep_jiffies when it's enlarged */
++static unsigned int uksm_sleep_saved;
++
++/* Max percentage of cpu utilization ksmd can take to scan in one batch */
++static unsigned int uksm_max_cpu_percentage;
++
++static int uksm_cpu_governor;
++
++static char *uksm_cpu_governor_str[4] = { "full", "medium", "low", "quiet" };
++
++struct uksm_cpu_preset_s {
++	int cpu_ratio[SCAN_LADDER_SIZE];
++	unsigned int cover_msecs[SCAN_LADDER_SIZE];
++	unsigned int max_cpu; /* percentage */
++};
++
++struct uksm_cpu_preset_s uksm_cpu_preset[4] = {
++	{ {20, 40, -2500, -10000}, {1000, 500, 200, 50}, 95},
++	{ {20, 30, -2500, -10000}, {1000, 500, 400, 100}, 50},
++	{ {10, 20, -5000, -10000}, {1500, 1000, 1000, 250}, 20},
++	{ {10, 20, 40, 75}, {2000, 1000, 1000, 1000}, 1},
++};
++
++/* The default value for uksm_ema_page_time if it's not initialized */
++#define UKSM_PAGE_TIME_DEFAULT	500
++
++/*cost to scan one page by expotional moving average in nsecs */
++static unsigned long uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
++
++/* The expotional moving average alpha weight, in percentage. */
++#define EMA_ALPHA	20
++
++/*
++ * The threshold used to filter out thrashing areas,
++ * If it == 0, filtering is disabled, otherwise it's the percentage up-bound
++ * of the thrashing ratio of all areas. Any area with a bigger thrashing ratio
++ * will be considered as having a zero duplication ratio.
++ */
++static unsigned int uksm_thrash_threshold = 50;
++
++/* How much dedup ratio is considered to be abundant*/
++static unsigned int uksm_abundant_threshold = 10;
++
++/* All slots having merged pages in this eval round. */
++struct list_head vma_slot_dedup = LIST_HEAD_INIT(vma_slot_dedup);
++
++/* How many times the ksmd has slept since startup */
++static unsigned long long uksm_sleep_times;
++
++#define UKSM_RUN_STOP	0
++#define UKSM_RUN_MERGE	1
++static unsigned int uksm_run = 1;
++
++static DECLARE_WAIT_QUEUE_HEAD(uksm_thread_wait);
++static DEFINE_MUTEX(uksm_thread_mutex);
++
++/*
++ * List vma_slot_new is for newly created vma_slot waiting to be added by
++ * ksmd. If one cannot be added(e.g. due to it's too small), it's moved to
++ * vma_slot_noadd. vma_slot_del is the list for vma_slot whose corresponding
++ * VMA has been removed/freed.
++ */
++struct list_head vma_slot_new = LIST_HEAD_INIT(vma_slot_new);
++struct list_head vma_slot_noadd = LIST_HEAD_INIT(vma_slot_noadd);
++struct list_head vma_slot_del = LIST_HEAD_INIT(vma_slot_del);
++static DEFINE_SPINLOCK(vma_slot_list_lock);
++
++/* The unstable tree heads */
++static struct rb_root root_unstable_tree = RB_ROOT;
++
++/*
++ * All tree_nodes are in a list to be freed at once when unstable tree is
++ * freed after each scan round.
++ */
++static struct list_head unstable_tree_node_list =
++				LIST_HEAD_INIT(unstable_tree_node_list);
++
++/* List contains all stable nodes */
++static struct list_head stable_node_list = LIST_HEAD_INIT(stable_node_list);
++
++/*
++ * When the hash strength is changed, the stable tree must be delta_hashed and
++ * re-structured. We use two set of below structs to speed up the
++ * re-structuring of stable tree.
++ */
++static struct list_head
++stable_tree_node_list[2] = {LIST_HEAD_INIT(stable_tree_node_list[0]),
++			    LIST_HEAD_INIT(stable_tree_node_list[1])};
++
++static struct list_head *stable_tree_node_listp = &stable_tree_node_list[0];
++static struct rb_root root_stable_tree[2] = {RB_ROOT, RB_ROOT};
++static struct rb_root *root_stable_treep = &root_stable_tree[0];
++static unsigned long stable_tree_index;
++
++/* The hash strength needed to hash a full page */
++#define HASH_STRENGTH_FULL		(PAGE_SIZE / sizeof(u32))
++
++/* The hash strength needed for loop-back hashing */
++#define HASH_STRENGTH_MAX		(HASH_STRENGTH_FULL + 10)
++
++/* The random offsets in a page */
++static u32 *random_nums;
++
++/* The hash strength */
++static unsigned long hash_strength = HASH_STRENGTH_FULL >> 4;
++
++/* The delta value each time the hash strength increases or decreases */
++static unsigned long hash_strength_delta;
++#define HASH_STRENGTH_DELTA_MAX	5
++
++/* The time we have saved due to random_sample_hash */
++static u64 rshash_pos;
++
++/* The time we have wasted due to hash collision */
++static u64 rshash_neg;
++
++struct uksm_benefit {
++	u64 pos;
++	u64 neg;
++	u64 scanned;
++	unsigned long base;
++} benefit;
++
++/*
++ * The relative cost of memcmp, compared to 1 time unit of random sample
++ * hash, this value is tested when ksm module is initialized
++ */
++static unsigned long memcmp_cost;
++
++static unsigned long  rshash_neg_cont_zero;
++static unsigned long  rshash_cont_obscure;
++
++/* The possible states of hash strength adjustment heuristic */
++enum rshash_states {
++		RSHASH_STILL,
++		RSHASH_TRYUP,
++		RSHASH_TRYDOWN,
++		RSHASH_NEW,
++		RSHASH_PRE_STILL,
++};
++
++/* The possible direction we are about to adjust hash strength */
++enum rshash_direct {
++	GO_UP,
++	GO_DOWN,
++	OBSCURE,
++	STILL,
++};
++
++/* random sampling hash state machine */
++static struct {
++	enum rshash_states state;
++	enum rshash_direct pre_direct;
++	u8 below_count;
++	/* Keep a lookup window of size 5, iff above_count/below_count > 3
++	 * in this window we stop trying.
++	 */
++	u8 lookup_window_index;
++	u64 stable_benefit;
++	unsigned long turn_point_down;
++	unsigned long turn_benefit_down;
++	unsigned long turn_point_up;
++	unsigned long turn_benefit_up;
++	unsigned long stable_point;
++} rshash_state;
++
++/*zero page hash table, hash_strength [0 ~ HASH_STRENGTH_MAX]*/
++static u32 *zero_hash_table;
++
++static inline struct node_vma *alloc_node_vma(void)
++{
++	struct node_vma *node_vma;
++
++	node_vma = kmem_cache_zalloc(node_vma_cache, GFP_KERNEL |
++				     __GFP_NORETRY | __GFP_NOWARN);
++	if (node_vma) {
++		INIT_HLIST_HEAD(&node_vma->rmap_hlist);
++		INIT_HLIST_NODE(&node_vma->hlist);
++	}
++	return node_vma;
++}
++
++static inline void free_node_vma(struct node_vma *node_vma)
++{
++	kmem_cache_free(node_vma_cache, node_vma);
++}
++
++
++static inline struct vma_slot *alloc_vma_slot(void)
++{
++	struct vma_slot *slot;
++
++	/*
++	 * In case ksm is not initialized by now.
++	 * Oops, we need to consider the call site of uksm_init() in the future.
++	 */
++	if (!vma_slot_cache)
++		return NULL;
++
++	slot = kmem_cache_zalloc(vma_slot_cache, GFP_KERNEL |
++				 __GFP_NORETRY | __GFP_NOWARN);
++	if (slot) {
++		INIT_LIST_HEAD(&slot->slot_list);
++		INIT_LIST_HEAD(&slot->dedup_list);
++		slot->flags |= UKSM_SLOT_NEED_RERAND;
++	}
++	return slot;
++}
++
++static inline void free_vma_slot(struct vma_slot *vma_slot)
++{
++	kmem_cache_free(vma_slot_cache, vma_slot);
++}
++
++
++
++static inline struct rmap_item *alloc_rmap_item(void)
++{
++	struct rmap_item *rmap_item;
++
++	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
++				      __GFP_NORETRY | __GFP_NOWARN);
++	if (rmap_item) {
++		/* bug on lowest bit is not clear for flag use */
++		BUG_ON(is_addr(rmap_item));
++	}
++	return rmap_item;
++}
++
++static inline void free_rmap_item(struct rmap_item *rmap_item)
++{
++	rmap_item->slot = NULL;	/* debug safety */
++	kmem_cache_free(rmap_item_cache, rmap_item);
++}
++
++static inline struct stable_node *alloc_stable_node(void)
++{
++	struct stable_node *node;
++
++	node = kmem_cache_alloc(stable_node_cache, GFP_KERNEL |
++				__GFP_NORETRY | __GFP_NOWARN);
++	if (!node)
++		return NULL;
++
++	INIT_HLIST_HEAD(&node->hlist);
++	list_add(&node->all_list, &stable_node_list);
++	return node;
++}
++
++static inline void free_stable_node(struct stable_node *stable_node)
++{
++	list_del(&stable_node->all_list);
++	kmem_cache_free(stable_node_cache, stable_node);
++}
++
++static inline struct tree_node *alloc_tree_node(struct list_head *list)
++{
++	struct tree_node *node;
++
++	node = kmem_cache_zalloc(tree_node_cache, GFP_KERNEL |
++				 __GFP_NORETRY | __GFP_NOWARN);
++	if (!node)
++		return NULL;
++
++	list_add(&node->all_list, list);
++	return node;
++}
++
++static inline void free_tree_node(struct tree_node *node)
++{
++	list_del(&node->all_list);
++	kmem_cache_free(tree_node_cache, node);
++}
++
++static void uksm_drop_anon_vma(struct rmap_item *rmap_item)
++{
++	struct anon_vma *anon_vma = rmap_item->anon_vma;
++
++	put_anon_vma(anon_vma);
++}
++
++
++/**
++ * Remove a stable node from stable_tree, may unlink from its tree_node and
++ * may remove its parent tree_node if no other stable node is pending.
++ *
++ * @stable_node	    The node need to be removed
++ * @unlink_rb	    Will this node be unlinked from the rbtree?
++ * @remove_tree_    node Will its tree_node be removed if empty?
++ */
++static void remove_node_from_stable_tree(struct stable_node *stable_node,
++					 int unlink_rb,  int remove_tree_node)
++{
++	struct node_vma *node_vma;
++	struct rmap_item *rmap_item;
++	struct hlist_node *n;
++
++	if (!hlist_empty(&stable_node->hlist)) {
++		hlist_for_each_entry_safe(node_vma, n,
++					  &stable_node->hlist, hlist) {
++			hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) {
++				uksm_pages_sharing--;
++
++				uksm_drop_anon_vma(rmap_item);
++				rmap_item->address &= PAGE_MASK;
++			}
++			free_node_vma(node_vma);
++			cond_resched();
++		}
++
++		/* the last one is counted as shared */
++		uksm_pages_shared--;
++		uksm_pages_sharing++;
++	}
++
++	if (stable_node->tree_node && unlink_rb) {
++		rb_erase(&stable_node->node,
++			 &stable_node->tree_node->sub_root);
++
++		if (RB_EMPTY_ROOT(&stable_node->tree_node->sub_root) &&
++		    remove_tree_node) {
++			rb_erase(&stable_node->tree_node->node,
++				 root_stable_treep);
++			free_tree_node(stable_node->tree_node);
++		} else {
++			stable_node->tree_node->count--;
++		}
++	}
++
++	free_stable_node(stable_node);
++}
++
++
++/*
++ * get_uksm_page: checks if the page indicated by the stable node
++ * is still its ksm page, despite having held no reference to it.
++ * In which case we can trust the content of the page, and it
++ * returns the gotten page; but if the page has now been zapped,
++ * remove the stale node from the stable tree and return NULL.
++ *
++ * You would expect the stable_node to hold a reference to the ksm page.
++ * But if it increments the page's count, swapping out has to wait for
++ * ksmd to come around again before it can free the page, which may take
++ * seconds or even minutes: much too unresponsive.  So instead we use a
++ * "keyhole reference": access to the ksm page from the stable node peeps
++ * out through its keyhole to see if that page still holds the right key,
++ * pointing back to this stable node.  This relies on freeing a PageAnon
++ * page to reset its page->mapping to NULL, and relies on no other use of
++ * a page to put something that might look like our key in page->mapping.
++ *
++ * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
++ * but this is different - made simpler by uksm_thread_mutex being held, but
++ * interesting for assuming that no other use of the struct page could ever
++ * put our expected_mapping into page->mapping (or a field of the union which
++ * coincides with page->mapping).  The RCU calls are not for KSM at all, but
++ * to keep the page_count protocol described with page_cache_get_speculative.
++ *
++ * Note: it is possible that get_uksm_page() will return NULL one moment,
++ * then page the next, if the page is in between page_freeze_refs() and
++ * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
++ * is on its way to being freed; but it is an anomaly to bear in mind.
++ *
++ * @unlink_rb:			if the removal of this node will firstly unlink from
++ * its rbtree. stable_node_reinsert will prevent this when restructuring the
++ * node from its old tree.
++ *
++ * @remove_tree_node:	if this is the last one of its tree_node, will the
++ * tree_node be freed ? If we are inserting stable node, this tree_node may
++ * be reused, so don't free it.
++ */
++static struct page *get_uksm_page(struct stable_node *stable_node,
++				 int unlink_rb, int remove_tree_node)
++{
++	struct page *page;
++	void *expected_mapping;
++	unsigned long kpfn;
++
++	expected_mapping = (void *)((unsigned long)stable_node |
++				    PAGE_MAPPING_KSM);
++again:
++	kpfn = READ_ONCE(stable_node->kpfn);
++	page = pfn_to_page(kpfn);
++
++	/*
++	 * page is computed from kpfn, so on most architectures reading
++	 * page->mapping is naturally ordered after reading node->kpfn,
++	 * but on Alpha we need to be more careful.
++	 */
++	smp_read_barrier_depends();
++
++	if (READ_ONCE(page->mapping) != expected_mapping)
++		goto stale;
++
++	/*
++	 * We cannot do anything with the page while its refcount is 0.
++	 * Usually 0 means free, or tail of a higher-order page: in which
++	 * case this node is no longer referenced, and should be freed;
++	 * however, it might mean that the page is under page_freeze_refs().
++	 * The __remove_mapping() case is easy, again the node is now stale;
++	 * but if page is swapcache in migrate_page_move_mapping(), it might
++	 * still be our page, in which case it's essential to keep the node.
++	 */
++	while (!get_page_unless_zero(page)) {
++		/*
++		 * Another check for page->mapping != expected_mapping would
++		 * work here too.  We have chosen the !PageSwapCache test to
++		 * optimize the common case, when the page is or is about to
++		 * be freed: PageSwapCache is cleared (under spin_lock_irq)
++		 * in the freeze_refs section of __remove_mapping(); but Anon
++		 * page->mapping reset to NULL later, in free_pages_prepare().
++		 */
++		if (!PageSwapCache(page))
++			goto stale;
++		cpu_relax();
++	}
++
++	if (READ_ONCE(page->mapping) != expected_mapping) {
++		put_page(page);
++		goto stale;
++	}
++
++	lock_page(page);
++	if (READ_ONCE(page->mapping) != expected_mapping) {
++		unlock_page(page);
++		put_page(page);
++		goto stale;
++	}
++	unlock_page(page);
++	return page;
++stale:
++	/*
++	 * We come here from above when page->mapping or !PageSwapCache
++	 * suggests that the node is stale; but it might be under migration.
++	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
++	 * before checking whether node->kpfn has been changed.
++	 */
++	smp_rmb();
++	if (stable_node->kpfn != kpfn)
++		goto again;
++
++	remove_node_from_stable_tree(stable_node, unlink_rb, remove_tree_node);
++
++	return NULL;
++}
++
++/*
++ * Removing rmap_item from stable or unstable tree.
++ * This function will clean the information from the stable/unstable tree.
++ */
++static inline void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
++{
++	if (rmap_item->address & STABLE_FLAG) {
++		struct stable_node *stable_node;
++		struct node_vma *node_vma;
++		struct page *page;
++
++		node_vma = rmap_item->head;
++		stable_node = node_vma->head;
++		page = get_uksm_page(stable_node, 1, 1);
++		if (!page)
++			goto out;
++
++		/*
++		 * page lock is needed because it's racing with
++		 * try_to_unmap_ksm(), etc.
++		 */
++		lock_page(page);
++		hlist_del(&rmap_item->hlist);
++
++		if (hlist_empty(&node_vma->rmap_hlist)) {
++			hlist_del(&node_vma->hlist);
++			free_node_vma(node_vma);
++		}
++		unlock_page(page);
++
++		put_page(page);
++		if (hlist_empty(&stable_node->hlist)) {
++			/* do NOT call remove_node_from_stable_tree() here,
++			 * it's possible for a forked rmap_item not in
++			 * stable tree while the in-tree rmap_items were
++			 * deleted.
++			 */
++			uksm_pages_shared--;
++		} else
++			uksm_pages_sharing--;
++
++
++		uksm_drop_anon_vma(rmap_item);
++	} else if (rmap_item->address & UNSTABLE_FLAG) {
++		if (rmap_item->hash_round == uksm_hash_round) {
++
++			rb_erase(&rmap_item->node,
++				 &rmap_item->tree_node->sub_root);
++			if (RB_EMPTY_ROOT(&rmap_item->tree_node->sub_root)) {
++				rb_erase(&rmap_item->tree_node->node,
++					 &root_unstable_tree);
++
++				free_tree_node(rmap_item->tree_node);
++			} else
++				rmap_item->tree_node->count--;
++		}
++		uksm_pages_unshared--;
++	}
++
++	rmap_item->address &= PAGE_MASK;
++	rmap_item->hash_max = 0;
++
++out:
++	cond_resched();		/* we're called from many long loops */
++}
++
++static inline int slot_in_uksm(struct vma_slot *slot)
++{
++	return list_empty(&slot->slot_list);
++}
++
++/*
++ * Test if the mm is exiting
++ */
++static inline bool uksm_test_exit(struct mm_struct *mm)
++{
++	return atomic_read(&mm->mm_users) == 0;
++}
++
++static inline unsigned long vma_pool_size(struct vma_slot *slot)
++{
++	return round_up(sizeof(struct rmap_list_entry) * slot->pages,
++			PAGE_SIZE) >> PAGE_SHIFT;
++}
++
++#define CAN_OVERFLOW_U64(x, delta) (U64_MAX - (x) < (delta))
++
++/* must be done with sem locked */
++static int slot_pool_alloc(struct vma_slot *slot)
++{
++	unsigned long pool_size;
++
++	if (slot->rmap_list_pool)
++		return 0;
++
++	pool_size = vma_pool_size(slot);
++	slot->rmap_list_pool = kcalloc(pool_size, sizeof(struct page *),
++				       GFP_KERNEL);
++	if (!slot->rmap_list_pool)
++		return -ENOMEM;
++
++	slot->pool_counts = kcalloc(pool_size, sizeof(unsigned int),
++				    GFP_KERNEL);
++	if (!slot->pool_counts) {
++		kfree(slot->rmap_list_pool);
++		return -ENOMEM;
++	}
++
++	slot->pool_size = pool_size;
++	BUG_ON(CAN_OVERFLOW_U64(uksm_pages_total, slot->pages));
++	slot->flags |= UKSM_SLOT_IN_UKSM;
++	uksm_pages_total += slot->pages;
++
++	return 0;
++}
++
++/*
++ * Called after vma is unlinked from its mm
++ */
++void uksm_remove_vma(struct vm_area_struct *vma)
++{
++	struct vma_slot *slot;
++
++	if (!vma->uksm_vma_slot)
++		return;
++
++	spin_lock(&vma_slot_list_lock);
++	slot = vma->uksm_vma_slot;
++	if (!slot)
++		goto out;
++
++	if (slot_in_uksm(slot)) {
++		/**
++		 * This slot has been added by ksmd, so move to the del list
++		 * waiting ksmd to free it.
++		 */
++		list_add_tail(&slot->slot_list, &vma_slot_del);
++	} else {
++		/**
++		 * It's still on new list. It's ok to free slot directly.
++		 */
++		list_del(&slot->slot_list);
++		free_vma_slot(slot);
++	}
++out:
++	vma->uksm_vma_slot = NULL;
++	spin_unlock(&vma_slot_list_lock);
++}
++
++/**
++ * Need to do two things:
++ * 1. check if slot was moved to del list
++ * 2. make sure the mmap_sem is manipulated under valid vma.
++ *
++ * My concern here is that in some cases, this may make
++ * vma_slot_list_lock() waiters to serialized further by some
++ * sem->wait_lock, can this really be expensive?
++ *
++ *
++ * @return
++ * 0: if successfully locked mmap_sem
++ * -ENOENT: this slot was moved to del list
++ * -EBUSY: vma lock failed
++ */
++static int try_down_read_slot_mmap_sem(struct vma_slot *slot)
++{
++	struct vm_area_struct *vma;
++	struct mm_struct *mm;
++	struct rw_semaphore *sem;
++
++	spin_lock(&vma_slot_list_lock);
++
++	/* the slot_list was removed and inited from new list, when it enters
++	 * uksm_list. If now it's not empty, then it must be moved to del list
++	 */
++	if (!slot_in_uksm(slot)) {
++		spin_unlock(&vma_slot_list_lock);
++		return -ENOENT;
++	}
++
++	BUG_ON(slot->pages != vma_pages(slot->vma));
++	/* Ok, vma still valid */
++	vma = slot->vma;
++	mm = vma->vm_mm;
++	sem = &mm->mmap_lock;
++
++	if (uksm_test_exit(mm)) {
++		spin_unlock(&vma_slot_list_lock);
++		return -ENOENT;
++	}
++
++	if (down_read_trylock(sem)) {
++		spin_unlock(&vma_slot_list_lock);
++		if (slot_pool_alloc(slot)) {
++			uksm_remove_vma(vma);
++			up_read(sem);
++			return -ENOENT;
++		}
++		return 0;
++	}
++
++	spin_unlock(&vma_slot_list_lock);
++	return -EBUSY;
++}
++
++static inline unsigned long
++vma_page_address(struct page *page, struct vm_area_struct *vma)
++{
++	pgoff_t pgoff = page->index;
++	unsigned long address;
++
++	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
++		/* page should be within @vma mapping range */
++		return -EFAULT;
++	}
++	return address;
++}
++
++
++/* return 0 on success with the item's mmap_sem locked */
++static inline int get_mergeable_page_lock_mmap(struct rmap_item *item)
++{
++	struct mm_struct *mm;
++	struct vma_slot *slot = item->slot;
++	int err = -EINVAL;
++
++	struct page *page;
++
++	/*
++	 * try_down_read_slot_mmap_sem() returns non-zero if the slot
++	 * has been removed by uksm_remove_vma().
++	 */
++	if (try_down_read_slot_mmap_sem(slot))
++		return -EBUSY;
++
++	mm = slot->vma->vm_mm;
++
++	if (uksm_test_exit(mm))
++		goto failout_up;
++
++	page = item->page;
++	rcu_read_lock();
++	if (!get_page_unless_zero(page)) {
++		rcu_read_unlock();
++		goto failout_up;
++	}
++
++	/* No need to consider huge page here. */
++	if (item->slot->vma->anon_vma != page_anon_vma(page) ||
++	    vma_page_address(page, item->slot->vma) != get_rmap_addr(item)) {
++		/*
++		 * TODO:
++		 * should we release this item becase of its stale page
++		 * mapping?
++		 */
++		put_page(page);
++		rcu_read_unlock();
++		goto failout_up;
++	}
++	rcu_read_unlock();
++	return 0;
++
++failout_up:
++	mmap_read_unlock(mm);
++	return err;
++}
++
++/*
++ * What kind of VMA is considered ?
++ */
++static inline int vma_can_enter(struct vm_area_struct *vma)
++{
++	return uksm_flags_can_scan(vma->vm_flags);
++}
++
++/*
++ * Called whenever a fresh new vma is created A new vma_slot.
++ * is created and inserted into a global list Must be called.
++ * after vma is inserted to its mm.
++ */
++void uksm_vma_add_new(struct vm_area_struct *vma)
++{
++	struct vma_slot *slot;
++
++	if (!vma_can_enter(vma)) {
++		vma->uksm_vma_slot = NULL;
++		return;
++	}
++
++	slot = alloc_vma_slot();
++	if (!slot) {
++		vma->uksm_vma_slot = NULL;
++		return;
++	}
++
++	vma->uksm_vma_slot = slot;
++	vma->vm_flags |= VM_MERGEABLE;
++	slot->vma = vma;
++	slot->mm = vma->vm_mm;
++	slot->ctime_j = jiffies;
++	slot->pages = vma_pages(vma);
++	spin_lock(&vma_slot_list_lock);
++	list_add_tail(&slot->slot_list, &vma_slot_new);
++	spin_unlock(&vma_slot_list_lock);
++}
++
++/*   32/3 < they < 32/2 */
++#define shiftl	8
++#define shiftr	12
++
++#define HASH_FROM_TO(from, to)			\
++for (index = from; index < to; index++) {	\
++	pos = random_nums[index];		\
++	hash += key[pos];			\
++	hash += (hash << shiftl);		\
++	hash ^= (hash >> shiftr);		\
++}
++
++
++#define HASH_FROM_DOWN_TO(from, to)		\
++for (index = from - 1; index >= to; index--) {	\
++	hash ^= (hash >> shiftr);		\
++	hash ^= (hash >> (shiftr*2));		\
++	hash -= (hash << shiftl);		\
++	hash += (hash << (shiftl*2));		\
++	pos = random_nums[index];		\
++	hash -= key[pos];			\
++}
++
++/*
++ * The main random sample hash function.
++ */
++static u32 random_sample_hash(void *addr, u32 hash_strength)
++{
++	u32 hash = 0xdeadbeef;
++	int index, pos, loop = hash_strength;
++	u32 *key = (u32 *)addr;
++
++	if (loop > HASH_STRENGTH_FULL)
++		loop = HASH_STRENGTH_FULL;
++
++	HASH_FROM_TO(0, loop);
++
++	if (hash_strength > HASH_STRENGTH_FULL) {
++		loop = hash_strength - HASH_STRENGTH_FULL;
++		HASH_FROM_TO(0, loop);
++	}
++
++	return hash;
++}
++
++
++/**
++ * It's used when hash strength is adjusted
++ *
++ * @addr The page's virtual address
++ * @from The original hash strength
++ * @to   The hash strength changed to
++ * @hash The hash value generated with "from" hash value
++ *
++ * return the hash value
++ */
++static u32 delta_hash(void *addr, int from, int to, u32 hash)
++{
++	u32 *key = (u32 *)addr;
++	int index, pos; /* make sure they are int type */
++
++	if (to > from) {
++		if (from >= HASH_STRENGTH_FULL) {
++			from -= HASH_STRENGTH_FULL;
++			to -= HASH_STRENGTH_FULL;
++			HASH_FROM_TO(from, to);
++		} else if (to <= HASH_STRENGTH_FULL) {
++			HASH_FROM_TO(from, to);
++		} else {
++			HASH_FROM_TO(from, HASH_STRENGTH_FULL);
++			HASH_FROM_TO(0, to - HASH_STRENGTH_FULL);
++		}
++	} else {
++		if (from <= HASH_STRENGTH_FULL) {
++			HASH_FROM_DOWN_TO(from, to);
++		} else if (to >= HASH_STRENGTH_FULL) {
++			from -= HASH_STRENGTH_FULL;
++			to -= HASH_STRENGTH_FULL;
++			HASH_FROM_DOWN_TO(from, to);
++		} else {
++			HASH_FROM_DOWN_TO(from - HASH_STRENGTH_FULL, 0);
++			HASH_FROM_DOWN_TO(HASH_STRENGTH_FULL, to);
++		}
++	}
++
++	return hash;
++}
++
++/**
++ *
++ * Called when: rshash_pos or rshash_neg is about to overflow or a scan round
++ * has finished.
++ *
++ * return 0 if no page has been scanned since last call, 1 otherwise.
++ */
++static inline int encode_benefit(void)
++{
++	u64 scanned_delta, pos_delta, neg_delta;
++	unsigned long base = benefit.base;
++
++	scanned_delta = uksm_pages_scanned - uksm_pages_scanned_last;
++
++	if (!scanned_delta)
++		return 0;
++
++	scanned_delta >>= base;
++	pos_delta = rshash_pos >> base;
++	neg_delta = rshash_neg >> base;
++
++	if (CAN_OVERFLOW_U64(benefit.pos, pos_delta) ||
++	    CAN_OVERFLOW_U64(benefit.neg, neg_delta) ||
++	    CAN_OVERFLOW_U64(benefit.scanned, scanned_delta)) {
++		benefit.scanned >>= 1;
++		benefit.neg >>= 1;
++		benefit.pos >>= 1;
++		benefit.base++;
++		scanned_delta >>= 1;
++		pos_delta >>= 1;
++		neg_delta >>= 1;
++	}
++
++	benefit.pos += pos_delta;
++	benefit.neg += neg_delta;
++	benefit.scanned += scanned_delta;
++
++	BUG_ON(!benefit.scanned);
++
++	rshash_pos = rshash_neg = 0;
++	uksm_pages_scanned_last = uksm_pages_scanned;
++
++	return 1;
++}
++
++static inline void reset_benefit(void)
++{
++	benefit.pos = 0;
++	benefit.neg = 0;
++	benefit.base = 0;
++	benefit.scanned = 0;
++}
++
++static inline void inc_rshash_pos(unsigned long delta)
++{
++	if (CAN_OVERFLOW_U64(rshash_pos, delta))
++		encode_benefit();
++
++	rshash_pos += delta;
++}
++
++static inline void inc_rshash_neg(unsigned long delta)
++{
++	if (CAN_OVERFLOW_U64(rshash_neg, delta))
++		encode_benefit();
++
++	rshash_neg += delta;
++}
++
++
++static inline u32 page_hash(struct page *page, unsigned long hash_strength,
++			    int cost_accounting)
++{
++	u32 val;
++	unsigned long delta;
++
++	void *addr = kmap_atomic(page);
++
++	val = random_sample_hash(addr, hash_strength);
++	kunmap_atomic(addr);
++
++	if (cost_accounting) {
++		if (hash_strength < HASH_STRENGTH_FULL)
++			delta = HASH_STRENGTH_FULL - hash_strength;
++		else
++			delta = 0;
++
++		inc_rshash_pos(delta);
++	}
++
++	return val;
++}
++
++static int memcmp_pages_with_cost(struct page *page1, struct page *page2,
++			int cost_accounting)
++{
++	char *addr1, *addr2;
++	int ret;
++
++	addr1 = kmap_atomic(page1);
++	addr2 = kmap_atomic(page2);
++	ret = memcmp(addr1, addr2, PAGE_SIZE);
++	kunmap_atomic(addr2);
++	kunmap_atomic(addr1);
++
++	if (cost_accounting)
++		inc_rshash_neg(memcmp_cost);
++
++	return ret;
++}
++
++static inline int pages_identical_with_cost(struct page *page1, struct page *page2)
++{
++	return !memcmp_pages_with_cost(page1, page2, 0);
++}
++
++static inline int is_page_full_zero(struct page *page)
++{
++	char *addr;
++	int ret;
++
++	addr = kmap_atomic(page);
++	ret = is_full_zero(addr, PAGE_SIZE);
++	kunmap_atomic(addr);
++
++	return ret;
++}
++
++static int write_protect_page(struct vm_area_struct *vma, struct page *page,
++			      pte_t *orig_pte, pte_t *old_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	struct page_vma_mapped_walk pvmw = {
++		.page = page,
++		.vma = vma,
++	};
++       struct mmu_notifier_range range;
++	int swapped;
++	int err = -EFAULT;
++
++	pvmw.address = page_address_in_vma(page, vma);
++	if (pvmw.address == -EFAULT)
++		goto out;
++
++	BUG_ON(PageTransCompound(page));
++
++        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, pvmw.address,
++                                pvmw.address + PAGE_SIZE);
++	mmu_notifier_invalidate_range_start(&range);
++
++	if (!page_vma_mapped_walk(&pvmw))
++		goto out_mn;
++	if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
++		goto out_unlock;
++
++	if (old_pte)
++		*old_pte = *pvmw.pte;
++
++	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
++	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) || mm_tlb_flush_pending(mm)) {
++		pte_t entry;
++
++		swapped = PageSwapCache(page);
++		flush_cache_page(vma, pvmw.address, page_to_pfn(page));
++		/*
++		 * Ok this is tricky, when get_user_pages_fast() run it doesn't
++		 * take any lock, therefore the check that we are going to make
++		 * with the pagecount against the mapcount is racey and
++		 * O_DIRECT can happen right after the check.
++		 * So we clear the pte and flush the tlb before the check
++		 * this assure us that no O_DIRECT can happen after the check
++		 * or in the middle of the check.
++		 */
++		entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
++		/*
++		 * Check that no O_DIRECT or similar I/O is in progress on the
++		 * page
++		 */
++		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
++			set_pte_at(mm, pvmw.address, pvmw.pte, entry);
++			goto out_unlock;
++		}
++		if (pte_dirty(entry))
++			set_page_dirty(page);
++
++		if (pte_protnone(entry))
++			entry = pte_mkclean(pte_clear_savedwrite(entry));
++		else
++			entry = pte_mkclean(pte_wrprotect(entry));
++
++		set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
++	}
++	*orig_pte = *pvmw.pte;
++	err = 0;
++
++out_unlock:
++	page_vma_mapped_walk_done(&pvmw);
++out_mn:
++	mmu_notifier_invalidate_range_end(&range);
++out:
++	return err;
++}
++
++#define MERGE_ERR_PGERR		1 /* the page is invalid cannot continue */
++#define MERGE_ERR_COLLI		2 /* there is a collision */
++#define MERGE_ERR_COLLI_MAX	3 /* collision at the max hash strength */
++#define MERGE_ERR_CHANGED	4 /* the page has changed since last hash */
++
++
++/**
++ * replace_page - replace page in vma by new ksm page
++ * @vma:      vma that holds the pte pointing to page
++ * @page:     the page we are replacing by kpage
++ * @kpage:    the ksm page we replace page by
++ * @orig_pte: the original value of the pte
++ *
++ * Returns 0 on success, MERGE_ERR_PGERR on failure.
++ */
++static int replace_page(struct vm_area_struct *vma, struct page *page,
++			struct page *kpage, pte_t orig_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++       struct mmu_notifier_range range;
++	pgd_t *pgd;
++	p4d_t *p4d;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep;
++	spinlock_t *ptl;
++	pte_t entry;
++
++	unsigned long addr;
++	int err = MERGE_ERR_PGERR;
++
++	addr = page_address_in_vma(page, vma);
++	if (addr == -EFAULT)
++		goto out;
++
++	pgd = pgd_offset(mm, addr);
++	if (!pgd_present(*pgd))
++		goto out;
++
++	p4d = p4d_offset(pgd, addr);
++	pud = pud_offset(p4d, addr);
++	if (!pud_present(*pud))
++		goto out;
++
++	pmd = pmd_offset(pud, addr);
++	BUG_ON(pmd_trans_huge(*pmd));
++	if (!pmd_present(*pmd))
++		goto out;
++
++        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
++                                addr + PAGE_SIZE);
++	mmu_notifier_invalidate_range_start(&range);
++
++	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++	if (!pte_same(*ptep, orig_pte)) {
++		pte_unmap_unlock(ptep, ptl);
++		goto out_mn;
++	}
++
++	flush_cache_page(vma, addr, pte_pfn(*ptep));
++	ptep_clear_flush_notify(vma, addr, ptep);
++	entry = mk_pte(kpage, vma->vm_page_prot);
++
++	/* special treatment is needed for zero_page */
++	if ((page_to_pfn(kpage) == uksm_zero_pfn) ||
++				(page_to_pfn(kpage) == zero_pfn)) {
++		entry = pte_mkspecial(entry);
++		dec_mm_counter(mm, MM_ANONPAGES);
++		inc_zone_page_state(page, NR_UKSM_ZERO_PAGES);
++	} else {
++		get_page(kpage);
++		page_add_anon_rmap(kpage, vma, addr, false);
++	}
++
++	set_pte_at_notify(mm, addr, ptep, entry);
++
++	page_remove_rmap(page, false);
++	if (!page_mapped(page))
++		try_to_free_swap(page);
++	put_page(page);
++
++	pte_unmap_unlock(ptep, ptl);
++	err = 0;
++out_mn:
++	mmu_notifier_invalidate_range_end(&range);
++out:
++	return err;
++}
++
++
++/**
++ *  Fully hash a page with HASH_STRENGTH_MAX return a non-zero hash value. The
++ *  zero hash value at HASH_STRENGTH_MAX is used to indicated that its
++ *  hash_max member has not been calculated.
++ *
++ * @page The page needs to be hashed
++ * @hash_old The hash value calculated with current hash strength
++ *
++ * return the new hash value calculated at HASH_STRENGTH_MAX
++ */
++static inline u32 page_hash_max(struct page *page, u32 hash_old)
++{
++	u32 hash_max = 0;
++	void *addr;
++
++	addr = kmap_atomic(page);
++	hash_max = delta_hash(addr, hash_strength,
++			      HASH_STRENGTH_MAX, hash_old);
++
++	kunmap_atomic(addr);
++
++	if (!hash_max)
++		hash_max = 1;
++
++	inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
++	return hash_max;
++}
++
++/*
++ * We compare the hash again, to ensure that it is really a hash collision
++ * instead of being caused by page write.
++ */
++static inline int check_collision(struct rmap_item *rmap_item,
++				  u32 hash)
++{
++	int err;
++	struct page *page = rmap_item->page;
++
++	/* if this rmap_item has already been hash_maxed, then the collision
++	 * must appears in the second-level rbtree search. In this case we check
++	 * if its hash_max value has been changed. Otherwise, the collision
++	 * happens in the first-level rbtree search, so we check against it's
++	 * current hash value.
++	 */
++	if (rmap_item->hash_max) {
++		inc_rshash_neg(memcmp_cost);
++		inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
++
++		if (rmap_item->hash_max == page_hash_max(page, hash))
++			err = MERGE_ERR_COLLI;
++		else
++			err = MERGE_ERR_CHANGED;
++	} else {
++		inc_rshash_neg(memcmp_cost + hash_strength);
++
++		if (page_hash(page, hash_strength, 0) == hash)
++			err = MERGE_ERR_COLLI;
++		else
++			err = MERGE_ERR_CHANGED;
++	}
++
++	return err;
++}
++
++/**
++ * Try to merge a rmap_item.page with a kpage in stable node. kpage must
++ * already be a ksm page.
++ *
++ * @return 0 if the pages were merged, -EFAULT otherwise.
++ */
++static int try_to_merge_with_uksm_page(struct rmap_item *rmap_item,
++				      struct page *kpage, u32 hash)
++{
++	struct vm_area_struct *vma = rmap_item->slot->vma;
++	struct mm_struct *mm = vma->vm_mm;
++	pte_t orig_pte = __pte(0);
++	int err = MERGE_ERR_PGERR;
++	struct page *page;
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	page = rmap_item->page;
++
++	if (page == kpage) { /* ksm page forked */
++		err = 0;
++		goto out;
++	}
++
++	/*
++	 * We need the page lock to read a stable PageSwapCache in
++	 * write_protect_page().  We use trylock_page() instead of
++	 * lock_page() because we don't want to wait here - we
++	 * prefer to continue scanning and merging different pages,
++	 * then come back to this page when it is unlocked.
++	 */
++	if (!trylock_page(page))
++		goto out;
++
++	if (!PageAnon(page) || !PageKsm(kpage))
++		goto out_unlock;
++
++	if (PageTransCompound(page)) {
++		err = split_huge_page(page);
++		if (err)
++			goto out_unlock;
++	}
++
++	/*
++	 * If this anonymous page is mapped only here, its pte may need
++	 * to be write-protected.  If it's mapped elsewhere, all of its
++	 * ptes are necessarily already write-protected.  But in either
++	 * case, we need to lock and check page_count is not raised.
++	 */
++	if (write_protect_page(vma, page, &orig_pte, NULL) == 0) {
++		if (pages_identical_with_cost(page, kpage))
++			err = replace_page(vma, page, kpage, orig_pte);
++		else
++			err = check_collision(rmap_item, hash);
++	}
++
++	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
++		munlock_vma_page(page);
++		if (!PageMlocked(kpage)) {
++			unlock_page(page);
++			lock_page(kpage);
++			mlock_vma_page(kpage);
++			page = kpage;		/* for final unlock */
++		}
++	}
++
++out_unlock:
++	unlock_page(page);
++out:
++	return err;
++}
++
++
++
++/**
++ * If two pages fail to merge in try_to_merge_two_pages, then we have a chance
++ * to restore a page mapping that has been changed in try_to_merge_two_pages.
++ *
++ * @return 0 on success.
++ */
++static int restore_uksm_page_pte(struct vm_area_struct *vma, unsigned long addr,
++			     pte_t orig_pte, pte_t wprt_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	p4d_t *p4d;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep;
++	spinlock_t *ptl;
++
++	int err = -EFAULT;
++
++	pgd = pgd_offset(mm, addr);
++	if (!pgd_present(*pgd))
++		goto out;
++
++	p4d = p4d_offset(pgd, addr);
++	pud = pud_offset(p4d, addr);
++	if (!pud_present(*pud))
++		goto out;
++
++	pmd = pmd_offset(pud, addr);
++	if (!pmd_present(*pmd))
++		goto out;
++
++	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++	if (!pte_same(*ptep, wprt_pte)) {
++		/* already copied, let it be */
++		pte_unmap_unlock(ptep, ptl);
++		goto out;
++	}
++
++	/*
++	 * Good boy, still here. When we still get the ksm page, it does not
++	 * return to the free page pool, there is no way that a pte was changed
++	 * to other page and gets back to this page. And remind that ksm page
++	 * do not reuse in do_wp_page(). So it's safe to restore the original
++	 * pte.
++	 */
++	flush_cache_page(vma, addr, pte_pfn(*ptep));
++	ptep_clear_flush_notify(vma, addr, ptep);
++	set_pte_at_notify(mm, addr, ptep, orig_pte);
++
++	pte_unmap_unlock(ptep, ptl);
++	err = 0;
++out:
++	return err;
++}
++
++/**
++ * try_to_merge_two_pages() - take two identical pages and prepare
++ * them to be merged into one page(rmap_item->page)
++ *
++ * @return 0 if we successfully merged two identical pages into
++ *         one ksm page. MERGE_ERR_COLLI if it's only a hash collision
++ *         search in rbtree. MERGE_ERR_CHANGED if rmap_item has been
++ *         changed since it's hashed. MERGE_ERR_PGERR otherwise.
++ *
++ */
++static int try_to_merge_two_pages(struct rmap_item *rmap_item,
++				  struct rmap_item *tree_rmap_item,
++				  u32 hash)
++{
++	pte_t orig_pte1 = __pte(0), orig_pte2 = __pte(0);
++	pte_t wprt_pte1 = __pte(0), wprt_pte2 = __pte(0);
++	struct vm_area_struct *vma1 = rmap_item->slot->vma;
++	struct vm_area_struct *vma2 = tree_rmap_item->slot->vma;
++	struct page *page = rmap_item->page;
++	struct page *tree_page = tree_rmap_item->page;
++	int err = MERGE_ERR_PGERR;
++	struct address_space *saved_mapping;
++
++
++	if (rmap_item->page == tree_rmap_item->page)
++		goto out;
++
++	if (!trylock_page(page))
++		goto out;
++
++	if (!PageAnon(page))
++		goto out_unlock;
++
++	if (PageTransCompound(page)) {
++		err = split_huge_page(page);
++		if (err)
++			goto out_unlock;
++	}
++
++	if (write_protect_page(vma1, page, &wprt_pte1, &orig_pte1) != 0) {
++		unlock_page(page);
++		goto out;
++	}
++
++	/*
++	 * While we hold page lock, upgrade page from
++	 * PageAnon+anon_vma to PageKsm+NULL stable_node:
++	 * stable_tree_insert() will update stable_node.
++	 */
++	saved_mapping = page->mapping;
++	set_page_stable_node(page, NULL);
++	mark_page_accessed(page);
++	if (!PageDirty(page))
++		SetPageDirty(page);
++
++	unlock_page(page);
++
++	if (!trylock_page(tree_page))
++		goto restore_out;
++
++	if (!PageAnon(tree_page)) {
++		unlock_page(tree_page);
++		goto restore_out;
++	}
++
++	if (PageTransCompound(tree_page)) {
++		err = split_huge_page(tree_page);
++		if (err) {
++			unlock_page(tree_page);
++			goto restore_out;
++		}
++	}
++
++	if (write_protect_page(vma2, tree_page, &wprt_pte2, &orig_pte2) != 0) {
++		unlock_page(tree_page);
++		goto restore_out;
++	}
++
++	if (pages_identical_with_cost(page, tree_page)) {
++		err = replace_page(vma2, tree_page, page, wprt_pte2);
++		if (err) {
++			unlock_page(tree_page);
++			goto restore_out;
++		}
++
++		if ((vma2->vm_flags & VM_LOCKED)) {
++			munlock_vma_page(tree_page);
++			if (!PageMlocked(page)) {
++				unlock_page(tree_page);
++				lock_page(page);
++				mlock_vma_page(page);
++				tree_page = page; /* for final unlock */
++			}
++		}
++
++		unlock_page(tree_page);
++
++		goto out; /* success */
++
++	} else {
++		if (tree_rmap_item->hash_max &&
++		    tree_rmap_item->hash_max == rmap_item->hash_max) {
++			err = MERGE_ERR_COLLI_MAX;
++		} else if (page_hash(page, hash_strength, 0) ==
++		    page_hash(tree_page, hash_strength, 0)) {
++			inc_rshash_neg(memcmp_cost + hash_strength * 2);
++			err = MERGE_ERR_COLLI;
++		} else {
++			err = MERGE_ERR_CHANGED;
++		}
++
++		unlock_page(tree_page);
++	}
++
++restore_out:
++	lock_page(page);
++	if (!restore_uksm_page_pte(vma1, get_rmap_addr(rmap_item),
++				  orig_pte1, wprt_pte1))
++		page->mapping = saved_mapping;
++
++out_unlock:
++	unlock_page(page);
++out:
++	return err;
++}
++
++static inline int hash_cmp(u32 new_val, u32 node_val)
++{
++	if (new_val > node_val)
++		return 1;
++	else if (new_val < node_val)
++		return -1;
++	else
++		return 0;
++}
++
++static inline u32 rmap_item_hash_max(struct rmap_item *item, u32 hash)
++{
++	u32 hash_max = item->hash_max;
++
++	if (!hash_max) {
++		hash_max = page_hash_max(item->page, hash);
++
++		item->hash_max = hash_max;
++	}
++
++	return hash_max;
++}
++
++
++
++/**
++ * stable_tree_search() - search the stable tree for a page
++ *
++ * @item:	the rmap_item we are comparing with
++ * @hash:	the hash value of this item->page already calculated
++ *
++ * @return	the page we have found, NULL otherwise. The page returned has
++ *			been gotten.
++ */
++static struct page *stable_tree_search(struct rmap_item *item, u32 hash)
++{
++	struct rb_node *node = root_stable_treep->rb_node;
++	struct tree_node *tree_node;
++	unsigned long hash_max;
++	struct page *page = item->page;
++	struct stable_node *stable_node;
++
++	stable_node = page_stable_node(page);
++	if (stable_node) {
++		/* ksm page forked, that is
++		 * if (PageKsm(page) && !in_stable_tree(rmap_item))
++		 * it's actually gotten once outside.
++		 */
++		get_page(page);
++		return page;
++	}
++
++	while (node) {
++		int cmp;
++
++		tree_node = rb_entry(node, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0)
++			node = node->rb_left;
++		else if (cmp > 0)
++			node = node->rb_right;
++		else
++			break;
++	}
++
++	if (!node)
++		return NULL;
++
++	if (tree_node->count == 1) {
++		stable_node = rb_entry(tree_node->sub_root.rb_node,
++				       struct stable_node, node);
++		BUG_ON(!stable_node);
++
++		goto get_page_out;
++	}
++
++	/*
++	 * ok, we have to search the second
++	 * level subtree, hash the page to a
++	 * full strength.
++	 */
++	node = tree_node->sub_root.rb_node;
++	BUG_ON(!node);
++	hash_max = rmap_item_hash_max(item, hash);
++
++	while (node) {
++		int cmp;
++
++		stable_node = rb_entry(node, struct stable_node, node);
++
++		cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++		if (cmp < 0)
++			node = node->rb_left;
++		else if (cmp > 0)
++			node = node->rb_right;
++		else
++			goto get_page_out;
++	}
++
++	return NULL;
++
++get_page_out:
++	page = get_uksm_page(stable_node, 1, 1);
++	return page;
++}
++
++static int try_merge_rmap_item(struct rmap_item *item,
++			       struct page *kpage,
++			       struct page *tree_page)
++{
++	struct vm_area_struct *vma = item->slot->vma;
++	struct page_vma_mapped_walk pvmw = {
++		.page = kpage,
++		.vma = vma,
++	};
++
++	pvmw.address = get_rmap_addr(item);
++	if (!page_vma_mapped_walk(&pvmw))
++		return 0;
++
++	if (pte_write(*pvmw.pte)) {
++		/* has changed, abort! */
++		page_vma_mapped_walk_done(&pvmw);
++		return 0;
++	}
++
++	get_page(tree_page);
++	page_add_anon_rmap(tree_page, vma, pvmw.address, false);
++
++	flush_cache_page(vma, pvmw.address, page_to_pfn(kpage));
++	ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
++	set_pte_at_notify(vma->vm_mm, pvmw.address, pvmw.pte,
++			  mk_pte(tree_page, vma->vm_page_prot));
++
++	page_remove_rmap(kpage, false);
++	put_page(kpage);
++
++	page_vma_mapped_walk_done(&pvmw);
++
++	return 1;
++}
++
++/**
++ * try_to_merge_with_stable_page() - when two rmap_items need to be inserted
++ * into stable tree, the page was found to be identical to a stable ksm page,
++ * this is the last chance we can merge them into one.
++ *
++ * @item1:	the rmap_item holding the page which we wanted to insert
++ *		into stable tree.
++ * @item2:	the other rmap_item we found when unstable tree search
++ * @oldpage:	the page currently mapped by the two rmap_items
++ * @tree_page:	the page we found identical in stable tree node
++ * @success1:	return if item1 is successfully merged
++ * @success2:	return if item2 is successfully merged
++ */
++static void try_merge_with_stable(struct rmap_item *item1,
++				  struct rmap_item *item2,
++				  struct page **kpage,
++				  struct page *tree_page,
++				  int *success1, int *success2)
++{
++	struct vm_area_struct *vma1 = item1->slot->vma;
++	struct vm_area_struct *vma2 = item2->slot->vma;
++	*success1 = 0;
++	*success2 = 0;
++
++	if (unlikely(*kpage == tree_page)) {
++		/* I don't think this can really happen */
++		pr_warn("UKSM: unexpected condition detected in "
++			"%s -- *kpage == tree_page !\n", __func__);
++		*success1 = 1;
++		*success2 = 1;
++		return;
++	}
++
++	if (!PageAnon(*kpage) || !PageKsm(*kpage))
++		goto failed;
++
++	if (!trylock_page(tree_page))
++		goto failed;
++
++	/* If the oldpage is still ksm and still pointed
++	 * to in the right place, and still write protected,
++	 * we are confident it's not changed, no need to
++	 * memcmp anymore.
++	 * be ware, we cannot take nested pte locks,
++	 * deadlock risk.
++	 */
++	if (!try_merge_rmap_item(item1, *kpage, tree_page))
++		goto unlock_failed;
++
++	/* ok, then vma2, remind that pte1 already set */
++	if (!try_merge_rmap_item(item2, *kpage, tree_page))
++		goto success_1;
++
++	*success2 = 1;
++success_1:
++	*success1 = 1;
++
++
++	if ((*success1 && vma1->vm_flags & VM_LOCKED) ||
++	    (*success2 && vma2->vm_flags & VM_LOCKED)) {
++		munlock_vma_page(*kpage);
++		if (!PageMlocked(tree_page))
++			mlock_vma_page(tree_page);
++	}
++
++	/*
++	 * We do not need oldpage any more in the caller, so can break the lock
++	 * now.
++	 */
++	unlock_page(*kpage);
++	*kpage = tree_page; /* Get unlocked outside. */
++	return;
++
++unlock_failed:
++	unlock_page(tree_page);
++failed:
++	return;
++}
++
++static inline void stable_node_hash_max(struct stable_node *node,
++					 struct page *page, u32 hash)
++{
++	u32 hash_max = node->hash_max;
++
++	if (!hash_max) {
++		hash_max = page_hash_max(page, hash);
++		node->hash_max = hash_max;
++	}
++}
++
++static inline
++struct stable_node *new_stable_node(struct tree_node *tree_node,
++				    struct page *kpage, u32 hash_max)
++{
++	struct stable_node *new_stable_node;
++
++	new_stable_node = alloc_stable_node();
++	if (!new_stable_node)
++		return NULL;
++
++	new_stable_node->kpfn = page_to_pfn(kpage);
++	new_stable_node->hash_max = hash_max;
++	new_stable_node->tree_node = tree_node;
++	set_page_stable_node(kpage, new_stable_node);
++
++	return new_stable_node;
++}
++
++static inline
++struct stable_node *first_level_insert(struct tree_node *tree_node,
++				       struct rmap_item *rmap_item,
++				       struct rmap_item *tree_rmap_item,
++				       struct page **kpage, u32 hash,
++				       int *success1, int *success2)
++{
++	int cmp;
++	struct page *tree_page;
++	u32 hash_max = 0;
++	struct stable_node *stable_node, *new_snode;
++	struct rb_node *parent = NULL, **new;
++
++	/* this tree node contains no sub-tree yet */
++	stable_node = rb_entry(tree_node->sub_root.rb_node,
++			       struct stable_node, node);
++
++	tree_page = get_uksm_page(stable_node, 1, 0);
++	if (tree_page) {
++		cmp = memcmp_pages_with_cost(*kpage, tree_page, 1);
++		if (!cmp) {
++			try_merge_with_stable(rmap_item, tree_rmap_item, kpage,
++					      tree_page, success1, success2);
++			put_page(tree_page);
++			if (!*success1 && !*success2)
++				goto failed;
++
++			return stable_node;
++
++		} else {
++			/*
++			 * collision in first level try to create a subtree.
++			 * A new node need to be created.
++			 */
++			put_page(tree_page);
++
++			stable_node_hash_max(stable_node, tree_page,
++					     tree_node->hash);
++			hash_max = rmap_item_hash_max(rmap_item, hash);
++			cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++			parent = &stable_node->node;
++			if (cmp < 0)
++				new = &parent->rb_left;
++			else if (cmp > 0)
++				new = &parent->rb_right;
++			else
++				goto failed;
++		}
++
++	} else {
++		/* the only stable_node deleted, we reuse its tree_node.
++		 */
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++	new_snode = new_stable_node(tree_node, *kpage, hash_max);
++	if (!new_snode)
++		goto failed;
++
++	rb_link_node(&new_snode->node, parent, new);
++	rb_insert_color(&new_snode->node, &tree_node->sub_root);
++	tree_node->count++;
++	*success1 = *success2 = 1;
++
++	return new_snode;
++
++failed:
++	return NULL;
++}
++
++static inline
++struct stable_node *stable_subtree_insert(struct tree_node *tree_node,
++					  struct rmap_item *rmap_item,
++					  struct rmap_item *tree_rmap_item,
++					  struct page **kpage, u32 hash,
++					  int *success1, int *success2)
++{
++	struct page *tree_page;
++	u32 hash_max;
++	struct stable_node *stable_node, *new_snode;
++	struct rb_node *parent, **new;
++
++research:
++	parent = NULL;
++	new = &tree_node->sub_root.rb_node;
++	BUG_ON(!*new);
++	hash_max = rmap_item_hash_max(rmap_item, hash);
++	while (*new) {
++		int cmp;
++
++		stable_node = rb_entry(*new, struct stable_node, node);
++
++		cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else {
++			tree_page = get_uksm_page(stable_node, 1, 0);
++			if (tree_page) {
++				cmp = memcmp_pages_with_cost(*kpage, tree_page, 1);
++				if (!cmp) {
++					try_merge_with_stable(rmap_item,
++						tree_rmap_item, kpage,
++						tree_page, success1, success2);
++
++					put_page(tree_page);
++					if (!*success1 && !*success2)
++						goto failed;
++					/*
++					 * successfully merged with a stable
++					 * node
++					 */
++					return stable_node;
++				} else {
++					put_page(tree_page);
++					goto failed;
++				}
++			} else {
++				/*
++				 * stable node may be deleted,
++				 * and subtree maybe
++				 * restructed, cannot
++				 * continue, research it.
++				 */
++				if (tree_node->count) {
++					goto research;
++				} else {
++					/* reuse the tree node*/
++					parent = NULL;
++					new = &tree_node->sub_root.rb_node;
++				}
++			}
++		}
++	}
++
++	new_snode = new_stable_node(tree_node, *kpage, hash_max);
++	if (!new_snode)
++		goto failed;
++
++	rb_link_node(&new_snode->node, parent, new);
++	rb_insert_color(&new_snode->node, &tree_node->sub_root);
++	tree_node->count++;
++	*success1 = *success2 = 1;
++
++	return new_snode;
++
++failed:
++	return NULL;
++}
++
++
++/**
++ * stable_tree_insert() - try to insert a merged page in unstable tree to
++ * the stable tree
++ *
++ * @kpage:		the page need to be inserted
++ * @hash:		the current hash of this page
++ * @rmap_item:		the rmap_item being scanned
++ * @tree_rmap_item:	the rmap_item found on unstable tree
++ * @success1:		return if rmap_item is merged
++ * @success2:		return if tree_rmap_item is merged
++ *
++ * @return		the stable_node on stable tree if at least one
++ *			rmap_item is inserted into stable tree, NULL
++ *			otherwise.
++ */
++static struct stable_node *
++stable_tree_insert(struct page **kpage, u32 hash,
++		   struct rmap_item *rmap_item,
++		   struct rmap_item *tree_rmap_item,
++		   int *success1, int *success2)
++{
++	struct rb_node **new = &root_stable_treep->rb_node;
++	struct rb_node *parent = NULL;
++	struct stable_node *stable_node;
++	struct tree_node *tree_node;
++	u32 hash_max = 0;
++
++	*success1 = *success2 = 0;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		if (tree_node->count == 1) {
++			stable_node = first_level_insert(tree_node, rmap_item,
++						tree_rmap_item, kpage,
++						hash, success1, success2);
++		} else {
++			stable_node = stable_subtree_insert(tree_node,
++					rmap_item, tree_rmap_item, kpage,
++					hash, success1, success2);
++		}
++	} else {
++
++		/* no tree node found */
++		tree_node = alloc_tree_node(stable_tree_node_listp);
++		if (!tree_node) {
++			stable_node = NULL;
++			goto out;
++		}
++
++		stable_node = new_stable_node(tree_node, *kpage, hash_max);
++		if (!stable_node) {
++			free_tree_node(tree_node);
++			goto out;
++		}
++
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, root_stable_treep);
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++
++		rb_link_node(&stable_node->node, parent, new);
++		rb_insert_color(&stable_node->node, &tree_node->sub_root);
++		tree_node->count++;
++		*success1 = *success2 = 1;
++	}
++
++out:
++	return stable_node;
++}
++
++
++/**
++ * get_tree_rmap_item_page() - try to get the page and lock the mmap_sem
++ *
++ * @return	0 on success, -EBUSY if unable to lock the mmap_sem,
++ *		-EINVAL if the page mapping has been changed.
++ */
++static inline int get_tree_rmap_item_page(struct rmap_item *tree_rmap_item)
++{
++	int err;
++
++	err = get_mergeable_page_lock_mmap(tree_rmap_item);
++
++	if (err == -EINVAL) {
++		/* its page map has been changed, remove it */
++		remove_rmap_item_from_tree(tree_rmap_item);
++	}
++
++	/* The page is gotten and mmap_sem is locked now. */
++	return err;
++}
++
++
++/**
++ * unstable_tree_search_insert() - search an unstable tree rmap_item with the
++ * same hash value. Get its page and trylock the mmap_sem
++ */
++static inline
++struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
++					      u32 hash)
++
++{
++	struct rb_node **new = &root_unstable_tree.rb_node;
++	struct rb_node *parent = NULL;
++	struct tree_node *tree_node;
++	u32 hash_max;
++	struct rmap_item *tree_rmap_item;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		/* got the tree_node */
++		if (tree_node->count == 1) {
++			tree_rmap_item = rb_entry(tree_node->sub_root.rb_node,
++						  struct rmap_item, node);
++			BUG_ON(!tree_rmap_item);
++
++			goto get_page_out;
++		}
++
++		/* well, search the collision subtree */
++		new = &tree_node->sub_root.rb_node;
++		BUG_ON(!*new);
++		hash_max = rmap_item_hash_max(rmap_item, hash);
++
++		while (*new) {
++			int cmp;
++
++			tree_rmap_item = rb_entry(*new, struct rmap_item,
++						  node);
++
++			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
++			parent = *new;
++			if (cmp < 0)
++				new = &parent->rb_left;
++			else if (cmp > 0)
++				new = &parent->rb_right;
++			else
++				goto get_page_out;
++		}
++	} else {
++		/* alloc a new tree_node */
++		tree_node = alloc_tree_node(&unstable_tree_node_list);
++		if (!tree_node)
++			return NULL;
++
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, &root_unstable_tree);
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++	/* did not found even in sub-tree */
++	rmap_item->tree_node = tree_node;
++	rmap_item->address |= UNSTABLE_FLAG;
++	rmap_item->hash_round = uksm_hash_round;
++	rb_link_node(&rmap_item->node, parent, new);
++	rb_insert_color(&rmap_item->node, &tree_node->sub_root);
++
++	uksm_pages_unshared++;
++	return NULL;
++
++get_page_out:
++	if (tree_rmap_item->page == rmap_item->page)
++		return NULL;
++
++	if (get_tree_rmap_item_page(tree_rmap_item))
++		return NULL;
++
++	return tree_rmap_item;
++}
++
++static void hold_anon_vma(struct rmap_item *rmap_item,
++			  struct anon_vma *anon_vma)
++{
++	rmap_item->anon_vma = anon_vma;
++	get_anon_vma(anon_vma);
++}
++
++
++/**
++ * stable_tree_append() - append a rmap_item to a stable node. Deduplication
++ * ratio statistics is done in this function.
++ *
++ */
++static void stable_tree_append(struct rmap_item *rmap_item,
++			       struct stable_node *stable_node, int logdedup)
++{
++	struct node_vma *node_vma = NULL, *new_node_vma, *node_vma_cont = NULL;
++	unsigned long key = (unsigned long)rmap_item->slot;
++	unsigned long factor = rmap_item->slot->rung->step;
++
++	BUG_ON(!stable_node);
++	rmap_item->address |= STABLE_FLAG;
++
++	if (hlist_empty(&stable_node->hlist)) {
++		uksm_pages_shared++;
++		goto node_vma_new;
++	} else {
++		uksm_pages_sharing++;
++	}
++
++	hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) {
++		if (node_vma->key >= key)
++			break;
++
++		if (logdedup) {
++			node_vma->slot->pages_bemerged += factor;
++			if (list_empty(&node_vma->slot->dedup_list))
++				list_add(&node_vma->slot->dedup_list,
++					 &vma_slot_dedup);
++		}
++	}
++
++	if (node_vma) {
++		if (node_vma->key == key) {
++			node_vma_cont = hlist_entry_safe(node_vma->hlist.next, struct node_vma, hlist);
++			goto node_vma_ok;
++		} else if (node_vma->key > key) {
++			node_vma_cont = node_vma;
++		}
++	}
++
++node_vma_new:
++	/* no same vma already in node, alloc a new node_vma */
++	new_node_vma = alloc_node_vma();
++	BUG_ON(!new_node_vma);
++	new_node_vma->head = stable_node;
++	new_node_vma->slot = rmap_item->slot;
++
++	if (!node_vma) {
++		hlist_add_head(&new_node_vma->hlist, &stable_node->hlist);
++	} else if (node_vma->key != key) {
++		if (node_vma->key < key)
++			hlist_add_behind(&new_node_vma->hlist, &node_vma->hlist);
++		else {
++			hlist_add_before(&new_node_vma->hlist,
++					 &node_vma->hlist);
++		}
++
++	}
++	node_vma = new_node_vma;
++
++node_vma_ok: /* ok, ready to add to the list */
++	rmap_item->head = node_vma;
++	hlist_add_head(&rmap_item->hlist, &node_vma->rmap_hlist);
++	hold_anon_vma(rmap_item, rmap_item->slot->vma->anon_vma);
++	if (logdedup) {
++		rmap_item->slot->pages_merged++;
++		if (node_vma_cont) {
++			node_vma = node_vma_cont;
++			hlist_for_each_entry_continue(node_vma, hlist) {
++				node_vma->slot->pages_bemerged += factor;
++				if (list_empty(&node_vma->slot->dedup_list))
++					list_add(&node_vma->slot->dedup_list,
++						 &vma_slot_dedup);
++			}
++		}
++	}
++}
++
++/*
++ * We use break_ksm to break COW on a ksm page: it's a stripped down
++ *
++ *	if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
++ *		put_page(page);
++ *
++ * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
++ * in case the application has unmapped and remapped mm,addr meanwhile.
++ * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
++ * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
++ */
++static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
++{
++	struct page *page;
++	int ret = 0;
++
++	do {
++		cond_resched();
++		page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
++		if (IS_ERR_OR_NULL(page))
++			break;
++		if (PageKsm(page)) {
++			ret = handle_mm_fault(vma, addr,
++					      FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
++		} else
++			ret = VM_FAULT_WRITE;
++		put_page(page);
++	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
++	/*
++	 * We must loop because handle_mm_fault() may back out if there's
++	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
++	 *
++	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
++	 * COW has been broken, even if the vma does not permit VM_WRITE;
++	 * but note that a concurrent fault might break PageKsm for us.
++	 *
++	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
++	 * backing file, which also invalidates anonymous pages: that's
++	 * okay, that truncation will have unmapped the PageKsm for us.
++	 *
++	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
++	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
++	 * current task has TIF_MEMDIE set, and will be OOM killed on return
++	 * to user; and ksmd, having no mm, would never be chosen for that.
++	 *
++	 * But if the mm is in a limited mem_cgroup, then the fault may fail
++	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
++	 * even ksmd can fail in this way - though it's usually breaking ksm
++	 * just to undo a merge it made a moment before, so unlikely to oom.
++	 *
++	 * That's a pity: we might therefore have more kernel pages allocated
++	 * than we're counting as nodes in the stable tree; but uksm_do_scan
++	 * will retry to break_cow on each pass, so should recover the page
++	 * in due course.  The important thing is to not let VM_MERGEABLE
++	 * be cleared while any such pages might remain in the area.
++	 */
++	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
++}
++
++static void break_cow(struct rmap_item *rmap_item)
++{
++	struct vm_area_struct *vma = rmap_item->slot->vma;
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long addr = get_rmap_addr(rmap_item);
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	break_ksm(vma, addr);
++out:
++	return;
++}
++
++/*
++ * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather
++ * than check every pte of a given vma, the locking doesn't quite work for
++ * that - an rmap_item is assigned to the stable tree after inserting ksm
++ * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
++ * rmap_items from parent to child at fork time (so as not to waste time
++ * if exit comes before the next scan reaches it).
++ *
++ * Similarly, although we'd like to remove rmap_items (so updating counts
++ * and freeing memory) when unmerging an area, it's easier to leave that
++ * to the next pass of ksmd - consider, for example, how ksmd might be
++ * in cmp_and_merge_page on one of the rmap_items we would be removing.
++ */
++inline int unmerge_uksm_pages(struct vm_area_struct *vma,
++		      unsigned long start, unsigned long end)
++{
++	unsigned long addr;
++	int err = 0;
++
++	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
++		if (uksm_test_exit(vma->vm_mm))
++			break;
++		if (signal_pending(current))
++			err = -ERESTARTSYS;
++		else
++			err = break_ksm(vma, addr);
++	}
++	return err;
++}
++
++static inline void inc_uksm_pages_scanned(void)
++{
++	u64 delta;
++
++
++	if (uksm_pages_scanned == U64_MAX) {
++		encode_benefit();
++
++		delta = uksm_pages_scanned >> pages_scanned_base;
++
++		if (CAN_OVERFLOW_U64(pages_scanned_stored, delta)) {
++			pages_scanned_stored >>= 1;
++			delta >>= 1;
++			pages_scanned_base++;
++		}
++
++		pages_scanned_stored += delta;
++
++		uksm_pages_scanned = uksm_pages_scanned_last = 0;
++	}
++
++	uksm_pages_scanned++;
++}
++
++static inline int find_zero_page_hash(int strength, u32 hash)
++{
++	return (zero_hash_table[strength] == hash);
++}
++
++static
++int cmp_and_merge_zero_page(struct vm_area_struct *vma, struct page *page)
++{
++	struct page *zero_page = empty_uksm_zero_page;
++	struct mm_struct *mm = vma->vm_mm;
++	pte_t orig_pte = __pte(0);
++	int err = -EFAULT;
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	if (!trylock_page(page))
++		goto out;
++
++	if (!PageAnon(page))
++		goto out_unlock;
++
++	if (PageTransCompound(page)) {
++		err = split_huge_page(page);
++		if (err)
++			goto out_unlock;
++	}
++
++	if (write_protect_page(vma, page, &orig_pte, 0) == 0) {
++		if (is_page_full_zero(page))
++			err = replace_page(vma, page, zero_page, orig_pte);
++	}
++
++out_unlock:
++	unlock_page(page);
++out:
++	return err;
++}
++
++/*
++ * cmp_and_merge_page() - first see if page can be merged into the stable
++ * tree; if not, compare hash to previous and if it's the same, see if page
++ * can be inserted into the unstable tree, or merged with a page already there
++ * and both transferred to the stable tree.
++ *
++ * @page: the page that we are searching identical page to.
++ * @rmap_item: the reverse mapping into the virtual address of this page
++ */
++static void cmp_and_merge_page(struct rmap_item *rmap_item, u32 hash)
++{
++	struct rmap_item *tree_rmap_item;
++	struct page *page;
++	struct page *kpage = NULL;
++	u32 hash_max;
++	int err;
++	unsigned int success1, success2;
++	struct stable_node *snode;
++	int cmp;
++	struct rb_node *parent = NULL, **new;
++
++	remove_rmap_item_from_tree(rmap_item);
++	page = rmap_item->page;
++
++	/* We first start with searching the page inside the stable tree */
++	kpage = stable_tree_search(rmap_item, hash);
++	if (kpage) {
++		err = try_to_merge_with_uksm_page(rmap_item, kpage,
++						 hash);
++		if (!err) {
++			/*
++			 * The page was successfully merged, add
++			 * its rmap_item to the stable tree.
++			 * page lock is needed because it's
++			 * racing with try_to_unmap_ksm(), etc.
++			 */
++			lock_page(kpage);
++			snode = page_stable_node(kpage);
++			stable_tree_append(rmap_item, snode, 1);
++			unlock_page(kpage);
++			put_page(kpage);
++			return; /* success */
++		}
++		put_page(kpage);
++
++		/*
++		 * if it's a collision and it has been search in sub-rbtree
++		 * (hash_max != 0), we want to abort, because if it is
++		 * successfully merged in unstable tree, the collision trends to
++		 * happen again.
++		 */
++		if (err == MERGE_ERR_COLLI && rmap_item->hash_max)
++			return;
++	}
++
++	tree_rmap_item =
++		unstable_tree_search_insert(rmap_item, hash);
++	if (tree_rmap_item) {
++		err = try_to_merge_two_pages(rmap_item, tree_rmap_item, hash);
++		/*
++		 * As soon as we merge this page, we want to remove the
++		 * rmap_item of the page we have merged with from the unstable
++		 * tree, and insert it instead as new node in the stable tree.
++		 */
++		if (!err) {
++			kpage = page;
++			remove_rmap_item_from_tree(tree_rmap_item);
++			lock_page(kpage);
++			snode = stable_tree_insert(&kpage, hash,
++						   rmap_item, tree_rmap_item,
++						   &success1, &success2);
++
++			/*
++			 * Do not log dedup for tree item, it's not counted as
++			 * scanned in this round.
++			 */
++			if (success2)
++				stable_tree_append(tree_rmap_item, snode, 0);
++
++			/*
++			 * The order of these two stable append is important:
++			 * we are scanning rmap_item.
++			 */
++			if (success1)
++				stable_tree_append(rmap_item, snode, 1);
++
++			/*
++			 * The original kpage may be unlocked inside
++			 * stable_tree_insert() already. This page
++			 * should be unlocked before doing
++			 * break_cow().
++			 */
++			unlock_page(kpage);
++
++			if (!success1)
++				break_cow(rmap_item);
++
++			if (!success2)
++				break_cow(tree_rmap_item);
++
++		} else if (err == MERGE_ERR_COLLI) {
++			BUG_ON(tree_rmap_item->tree_node->count > 1);
++
++			rmap_item_hash_max(tree_rmap_item,
++					   tree_rmap_item->tree_node->hash);
++
++			hash_max = rmap_item_hash_max(rmap_item, hash);
++			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
++			parent = &tree_rmap_item->node;
++			if (cmp < 0)
++				new = &parent->rb_left;
++			else if (cmp > 0)
++				new = &parent->rb_right;
++			else
++				goto put_up_out;
++
++			rmap_item->tree_node = tree_rmap_item->tree_node;
++			rmap_item->address |= UNSTABLE_FLAG;
++			rmap_item->hash_round = uksm_hash_round;
++			rb_link_node(&rmap_item->node, parent, new);
++			rb_insert_color(&rmap_item->node,
++					&tree_rmap_item->tree_node->sub_root);
++			rmap_item->tree_node->count++;
++		} else {
++			/*
++			 * either one of the page has changed or they collide
++			 * at the max hash, we consider them as ill items.
++			 */
++			remove_rmap_item_from_tree(tree_rmap_item);
++		}
++put_up_out:
++		put_page(tree_rmap_item->page);
++		mmap_read_unlock(tree_rmap_item->slot->vma->vm_mm);
++	}
++}
++
++
++
++
++static inline unsigned long get_pool_index(struct vma_slot *slot,
++					   unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = (sizeof(struct rmap_list_entry *) * index) >> PAGE_SHIFT;
++	if (pool_index >= slot->pool_size)
++		BUG();
++	return pool_index;
++}
++
++static inline unsigned long index_page_offset(unsigned long index)
++{
++	return offset_in_page(sizeof(struct rmap_list_entry *) * index);
++}
++
++static inline
++struct rmap_list_entry *get_rmap_list_entry(struct vma_slot *slot,
++					    unsigned long index, int need_alloc)
++{
++	unsigned long pool_index;
++	struct page *page;
++	void *addr;
++
++
++	pool_index = get_pool_index(slot, index);
++	if (!slot->rmap_list_pool[pool_index]) {
++		if (!need_alloc)
++			return NULL;
++
++		page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
++		if (!page)
++			return NULL;
++
++		slot->rmap_list_pool[pool_index] = page;
++	}
++
++	addr = kmap(slot->rmap_list_pool[pool_index]);
++	addr += index_page_offset(index);
++
++	return addr;
++}
++
++static inline void put_rmap_list_entry(struct vma_slot *slot,
++				       unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	kunmap(slot->rmap_list_pool[pool_index]);
++}
++
++static inline int entry_is_new(struct rmap_list_entry *entry)
++{
++	return !entry->item;
++}
++
++static inline unsigned long get_index_orig_addr(struct vma_slot *slot,
++						unsigned long index)
++{
++	return slot->vma->vm_start + (index << PAGE_SHIFT);
++}
++
++static inline unsigned long get_entry_address(struct rmap_list_entry *entry)
++{
++	unsigned long addr;
++
++	if (is_addr(entry->addr))
++		addr = get_clean_addr(entry->addr);
++	else if (entry->item)
++		addr = get_rmap_addr(entry->item);
++	else
++		BUG();
++
++	return addr;
++}
++
++static inline struct rmap_item *get_entry_item(struct rmap_list_entry *entry)
++{
++	if (is_addr(entry->addr))
++		return NULL;
++
++	return entry->item;
++}
++
++static inline void inc_rmap_list_pool_count(struct vma_slot *slot,
++					    unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	slot->pool_counts[pool_index]++;
++}
++
++static inline void dec_rmap_list_pool_count(struct vma_slot *slot,
++					    unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	BUG_ON(!slot->pool_counts[pool_index]);
++	slot->pool_counts[pool_index]--;
++}
++
++static inline int entry_has_rmap(struct rmap_list_entry *entry)
++{
++	return !is_addr(entry->addr) && entry->item;
++}
++
++static inline void swap_entries(struct rmap_list_entry *entry1,
++				unsigned long index1,
++				struct rmap_list_entry *entry2,
++				unsigned long index2)
++{
++	struct rmap_list_entry tmp;
++
++	/* swapping two new entries is meaningless */
++	BUG_ON(entry_is_new(entry1) && entry_is_new(entry2));
++
++	tmp = *entry1;
++	*entry1 = *entry2;
++	*entry2 = tmp;
++
++	if (entry_has_rmap(entry1))
++		entry1->item->entry_index = index1;
++
++	if (entry_has_rmap(entry2))
++		entry2->item->entry_index = index2;
++
++	if (entry_has_rmap(entry1) && !entry_has_rmap(entry2)) {
++		inc_rmap_list_pool_count(entry1->item->slot, index1);
++		dec_rmap_list_pool_count(entry1->item->slot, index2);
++	} else if (!entry_has_rmap(entry1) && entry_has_rmap(entry2)) {
++		inc_rmap_list_pool_count(entry2->item->slot, index2);
++		dec_rmap_list_pool_count(entry2->item->slot, index1);
++	}
++}
++
++static inline void free_entry_item(struct rmap_list_entry *entry)
++{
++	unsigned long index;
++	struct rmap_item *item;
++
++	if (!is_addr(entry->addr)) {
++		BUG_ON(!entry->item);
++		item = entry->item;
++		entry->addr = get_rmap_addr(item);
++		set_is_addr(entry->addr);
++		index = item->entry_index;
++		remove_rmap_item_from_tree(item);
++		dec_rmap_list_pool_count(item->slot, index);
++		free_rmap_item(item);
++	}
++}
++
++static inline int pool_entry_boundary(unsigned long index)
++{
++	unsigned long linear_addr;
++
++	linear_addr = sizeof(struct rmap_list_entry *) * index;
++	return index && !offset_in_page(linear_addr);
++}
++
++static inline void try_free_last_pool(struct vma_slot *slot,
++				      unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	if (slot->rmap_list_pool[pool_index] &&
++	    !slot->pool_counts[pool_index]) {
++		__free_page(slot->rmap_list_pool[pool_index]);
++		slot->rmap_list_pool[pool_index] = NULL;
++		slot->flags |= UKSM_SLOT_NEED_SORT;
++	}
++
++}
++
++static inline unsigned long vma_item_index(struct vm_area_struct *vma,
++					   struct rmap_item *item)
++{
++	return (get_rmap_addr(item) - vma->vm_start) >> PAGE_SHIFT;
++}
++
++static int within_same_pool(struct vma_slot *slot,
++			    unsigned long i, unsigned long j)
++{
++	unsigned long pool_i, pool_j;
++
++	pool_i = get_pool_index(slot, i);
++	pool_j = get_pool_index(slot, j);
++
++	return (pool_i == pool_j);
++}
++
++static void sort_rmap_entry_list(struct vma_slot *slot)
++{
++	unsigned long i, j;
++	struct rmap_list_entry *entry, *swap_entry;
++
++	entry = get_rmap_list_entry(slot, 0, 0);
++	for (i = 0; i < slot->pages; ) {
++
++		if (!entry)
++			goto skip_whole_pool;
++
++		if (entry_is_new(entry))
++			goto next_entry;
++
++		if (is_addr(entry->addr)) {
++			entry->addr = 0;
++			goto next_entry;
++		}
++
++		j = vma_item_index(slot->vma, entry->item);
++		if (j == i)
++			goto next_entry;
++
++		if (within_same_pool(slot, i, j))
++			swap_entry = entry + j - i;
++		else
++			swap_entry = get_rmap_list_entry(slot, j, 1);
++
++		swap_entries(entry, i, swap_entry, j);
++		if (!within_same_pool(slot, i, j))
++			put_rmap_list_entry(slot, j);
++		continue;
++
++skip_whole_pool:
++		i += PAGE_SIZE / sizeof(*entry);
++		if (i < slot->pages)
++			entry = get_rmap_list_entry(slot, i, 0);
++		continue;
++
++next_entry:
++		if (i >= slot->pages - 1 ||
++		    !within_same_pool(slot, i, i + 1)) {
++			put_rmap_list_entry(slot, i);
++			if (i + 1 < slot->pages)
++				entry = get_rmap_list_entry(slot, i + 1, 0);
++		} else
++			entry++;
++		i++;
++		continue;
++	}
++
++	/* free empty pool entries which contain no rmap_item */
++	/* CAN be simplied to based on only pool_counts when bug freed !!!!! */
++	for (i = 0; i < slot->pool_size; i++) {
++		unsigned char has_rmap;
++		void *addr;
++
++		if (!slot->rmap_list_pool[i])
++			continue;
++
++		has_rmap = 0;
++		addr = kmap(slot->rmap_list_pool[i]);
++		BUG_ON(!addr);
++		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
++			entry = (struct rmap_list_entry *)addr + j;
++			if (is_addr(entry->addr))
++				continue;
++			if (!entry->item)
++				continue;
++			has_rmap = 1;
++		}
++		kunmap(slot->rmap_list_pool[i]);
++		if (!has_rmap) {
++			BUG_ON(slot->pool_counts[i]);
++			__free_page(slot->rmap_list_pool[i]);
++			slot->rmap_list_pool[i] = NULL;
++		}
++	}
++
++	slot->flags &= ~UKSM_SLOT_NEED_SORT;
++}
++
++/*
++ * vma_fully_scanned() - if all the pages in this slot have been scanned.
++ */
++static inline int vma_fully_scanned(struct vma_slot *slot)
++{
++	return slot->pages_scanned == slot->pages;
++}
++
++/**
++ * get_next_rmap_item() - Get the next rmap_item in a vma_slot according to
++ * its random permutation. This function is embedded with the random
++ * permutation index management code.
++ */
++static struct rmap_item *get_next_rmap_item(struct vma_slot *slot, u32 *hash)
++{
++	unsigned long rand_range, addr, swap_index, scan_index;
++	struct rmap_item *item = NULL;
++	struct rmap_list_entry *scan_entry, *swap_entry = NULL;
++	struct page *page;
++
++	scan_index = swap_index = slot->pages_scanned % slot->pages;
++
++	if (pool_entry_boundary(scan_index))
++		try_free_last_pool(slot, scan_index - 1);
++
++	if (vma_fully_scanned(slot)) {
++		if (slot->flags & UKSM_SLOT_NEED_SORT)
++			slot->flags |= UKSM_SLOT_NEED_RERAND;
++		else
++			slot->flags &= ~UKSM_SLOT_NEED_RERAND;
++		if (slot->flags & UKSM_SLOT_NEED_SORT)
++			sort_rmap_entry_list(slot);
++	}
++
++	scan_entry = get_rmap_list_entry(slot, scan_index, 1);
++	if (!scan_entry)
++		return NULL;
++
++	if (entry_is_new(scan_entry)) {
++		scan_entry->addr = get_index_orig_addr(slot, scan_index);
++		set_is_addr(scan_entry->addr);
++	}
++
++	if (slot->flags & UKSM_SLOT_NEED_RERAND) {
++		rand_range = slot->pages - scan_index;
++		BUG_ON(!rand_range);
++		swap_index = scan_index + (prandom_u32() % rand_range);
++	}
++
++	if (swap_index != scan_index) {
++		swap_entry = get_rmap_list_entry(slot, swap_index, 1);
++
++		if (!swap_entry)
++			return NULL;
++
++		if (entry_is_new(swap_entry)) {
++			swap_entry->addr = get_index_orig_addr(slot,
++							       swap_index);
++			set_is_addr(swap_entry->addr);
++		}
++		swap_entries(scan_entry, scan_index, swap_entry, swap_index);
++	}
++
++	addr = get_entry_address(scan_entry);
++	item = get_entry_item(scan_entry);
++	BUG_ON(addr > slot->vma->vm_end || addr < slot->vma->vm_start);
++
++	page = follow_page(slot->vma, addr, FOLL_GET);
++	if (IS_ERR_OR_NULL(page))
++		goto nopage;
++
++	if (!PageAnon(page))
++		goto putpage;
++
++	/*check is zero_page pfn or uksm_zero_page*/
++	if ((page_to_pfn(page) == zero_pfn)
++			|| (page_to_pfn(page) == uksm_zero_pfn))
++		goto putpage;
++
++	flush_anon_page(slot->vma, page, addr);
++	flush_dcache_page(page);
++
++
++	*hash = page_hash(page, hash_strength, 1);
++	inc_uksm_pages_scanned();
++	/*if the page content all zero, re-map to zero-page*/
++	if (find_zero_page_hash(hash_strength, *hash)) {
++		if (!cmp_and_merge_zero_page(slot->vma, page)) {
++			slot->pages_merged++;
++
++			/* For full-zero pages, no need to create rmap item */
++			goto putpage;
++		} else {
++			inc_rshash_neg(memcmp_cost / 2);
++		}
++	}
++
++	if (!item) {
++		item = alloc_rmap_item();
++		if (item) {
++			/* It has already been zeroed */
++			item->slot = slot;
++			item->address = addr;
++			item->entry_index = scan_index;
++			scan_entry->item = item;
++			inc_rmap_list_pool_count(slot, scan_index);
++		} else
++			goto putpage;
++	}
++
++	BUG_ON(item->slot != slot);
++	/* the page may have changed */
++	item->page = page;
++	put_rmap_list_entry(slot, scan_index);
++	if (swap_entry)
++		put_rmap_list_entry(slot, swap_index);
++	return item;
++
++putpage:
++	put_page(page);
++	page = NULL;
++nopage:
++	/* no page, store addr back and free rmap_item if possible */
++	free_entry_item(scan_entry);
++	put_rmap_list_entry(slot, scan_index);
++	if (swap_entry)
++		put_rmap_list_entry(slot, swap_index);
++	return NULL;
++}
++
++static inline int in_stable_tree(struct rmap_item *rmap_item)
++{
++	return rmap_item->address & STABLE_FLAG;
++}
++
++/**
++ * scan_vma_one_page() - scan the next page in a vma_slot. Called with
++ * mmap_sem locked.
++ */
++static noinline void scan_vma_one_page(struct vma_slot *slot)
++{
++	u32 hash;
++	struct mm_struct *mm;
++	struct rmap_item *rmap_item = NULL;
++	struct vm_area_struct *vma = slot->vma;
++
++	mm = vma->vm_mm;
++	BUG_ON(!mm);
++	BUG_ON(!slot);
++
++	rmap_item = get_next_rmap_item(slot, &hash);
++	if (!rmap_item)
++		goto out1;
++
++	if (PageKsm(rmap_item->page) && in_stable_tree(rmap_item))
++		goto out2;
++
++	cmp_and_merge_page(rmap_item, hash);
++out2:
++	put_page(rmap_item->page);
++out1:
++	slot->pages_scanned++;
++	slot->this_sampled++;
++	if (slot->fully_scanned_round != fully_scanned_round)
++		scanned_virtual_pages++;
++
++	if (vma_fully_scanned(slot))
++		slot->fully_scanned_round = fully_scanned_round;
++}
++
++static inline unsigned long rung_get_pages(struct scan_rung *rung)
++{
++	struct slot_tree_node *node;
++
++	if (!rung->vma_root.rnode)
++		return 0;
++
++	node = container_of(rung->vma_root.rnode, struct slot_tree_node, snode);
++
++	return node->size;
++}
++
++#define RUNG_SAMPLED_MIN	3
++
++static inline
++void uksm_calc_rung_step(struct scan_rung *rung,
++			 unsigned long page_time, unsigned long ratio)
++{
++	unsigned long sampled, pages;
++
++	/* will be fully scanned ? */
++	if (!rung->cover_msecs) {
++		rung->step = 1;
++		return;
++	}
++
++	sampled = rung->cover_msecs * (NSEC_PER_MSEC / TIME_RATIO_SCALE)
++		  * ratio / page_time;
++
++	/*
++	 *  Before we finsish a scan round and expensive per-round jobs,
++	 *  we need to have a chance to estimate the per page time. So
++	 *  the sampled number can not be too small.
++	 */
++	if (sampled < RUNG_SAMPLED_MIN)
++		sampled = RUNG_SAMPLED_MIN;
++
++	pages = rung_get_pages(rung);
++	if (likely(pages > sampled))
++		rung->step = pages / sampled;
++	else
++		rung->step = 1;
++}
++
++static inline int step_need_recalc(struct scan_rung *rung)
++{
++	unsigned long pages, stepmax;
++
++	pages = rung_get_pages(rung);
++	stepmax = pages / RUNG_SAMPLED_MIN;
++
++	return pages && (rung->step > pages ||
++			 (stepmax && rung->step > stepmax));
++}
++
++static inline
++void reset_current_scan(struct scan_rung *rung, int finished, int step_recalc)
++{
++	struct vma_slot *slot;
++
++	if (finished)
++		rung->flags |= UKSM_RUNG_ROUND_FINISHED;
++
++	if (step_recalc || step_need_recalc(rung)) {
++		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
++		BUG_ON(step_need_recalc(rung));
++	}
++
++	slot_iter_index = prandom_u32() % rung->step;
++	BUG_ON(!rung->vma_root.rnode);
++	slot = sradix_tree_next(&rung->vma_root, NULL, 0, slot_iter);
++	BUG_ON(!slot);
++
++	rung->current_scan = slot;
++	rung->current_offset = slot_iter_index;
++}
++
++static inline struct sradix_tree_root *slot_get_root(struct vma_slot *slot)
++{
++	return &slot->rung->vma_root;
++}
++
++/*
++ * return if resetted.
++ */
++static int advance_current_scan(struct scan_rung *rung)
++{
++	unsigned short n;
++	struct vma_slot *slot, *next = NULL;
++
++	BUG_ON(!rung->vma_root.num);
++
++	slot = rung->current_scan;
++	n = (slot->pages - rung->current_offset) % rung->step;
++	slot_iter_index = rung->step - n;
++	next = sradix_tree_next(&rung->vma_root, slot->snode,
++				slot->sindex, slot_iter);
++
++	if (next) {
++		rung->current_offset = slot_iter_index;
++		rung->current_scan = next;
++		return 0;
++	} else {
++		reset_current_scan(rung, 1, 0);
++		return 1;
++	}
++}
++
++static inline void rung_rm_slot(struct vma_slot *slot)
++{
++	struct scan_rung *rung = slot->rung;
++	struct sradix_tree_root *root;
++
++	if (rung->current_scan == slot)
++		advance_current_scan(rung);
++
++	root = slot_get_root(slot);
++	sradix_tree_delete_from_leaf(root, slot->snode, slot->sindex);
++	slot->snode = NULL;
++	if (step_need_recalc(rung)) {
++		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
++		BUG_ON(step_need_recalc(rung));
++	}
++
++	/* In case advance_current_scan loop back to this slot again */
++	if (rung->vma_root.num && rung->current_scan == slot)
++		reset_current_scan(slot->rung, 1, 0);
++}
++
++static inline void rung_add_new_slots(struct scan_rung *rung,
++			struct vma_slot **slots, unsigned long num)
++{
++	int err;
++	struct vma_slot *slot;
++	unsigned long i;
++	struct sradix_tree_root *root = &rung->vma_root;
++
++	err = sradix_tree_enter(root, (void **)slots, num);
++	BUG_ON(err);
++
++	for (i = 0; i < num; i++) {
++		slot = slots[i];
++		slot->rung = rung;
++		BUG_ON(vma_fully_scanned(slot));
++	}
++
++	if (rung->vma_root.num == num)
++		reset_current_scan(rung, 0, 1);
++}
++
++static inline int rung_add_one_slot(struct scan_rung *rung,
++				     struct vma_slot *slot)
++{
++	int err;
++
++	err = sradix_tree_enter(&rung->vma_root, (void **)&slot, 1);
++	if (err)
++		return err;
++
++	slot->rung = rung;
++	if (rung->vma_root.num == 1)
++		reset_current_scan(rung, 0, 1);
++
++	return 0;
++}
++
++/*
++ * Return true if the slot is deleted from its rung.
++ */
++static inline int vma_rung_enter(struct vma_slot *slot, struct scan_rung *rung)
++{
++	struct scan_rung *old_rung = slot->rung;
++	int err;
++
++	if (old_rung == rung)
++		return 0;
++
++	rung_rm_slot(slot);
++	err = rung_add_one_slot(rung, slot);
++	if (err) {
++		err = rung_add_one_slot(old_rung, slot);
++		WARN_ON(err); /* OOPS, badly OOM, we lost this slot */
++	}
++
++	return 1;
++}
++
++static inline int vma_rung_up(struct vma_slot *slot)
++{
++	struct scan_rung *rung;
++
++	rung = slot->rung;
++	if (slot->rung != &uksm_scan_ladder[SCAN_LADDER_SIZE-1])
++		rung++;
++
++	return vma_rung_enter(slot, rung);
++}
++
++static inline int vma_rung_down(struct vma_slot *slot)
++{
++	struct scan_rung *rung;
++
++	rung = slot->rung;
++	if (slot->rung != &uksm_scan_ladder[0])
++		rung--;
++
++	return vma_rung_enter(slot, rung);
++}
++
++/**
++ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
++ */
++static unsigned long cal_dedup_ratio(struct vma_slot *slot)
++{
++	unsigned long ret;
++	unsigned long pages;
++
++	pages = slot->this_sampled;
++	if (!pages)
++		return 0;
++
++	BUG_ON(slot->pages_scanned == slot->last_scanned);
++
++	ret = slot->pages_merged;
++
++	/* Thrashing area filtering */
++	if (ret && uksm_thrash_threshold) {
++		if (slot->pages_cowed * 100 / slot->pages_merged
++		    > uksm_thrash_threshold) {
++			ret = 0;
++		} else {
++			ret = slot->pages_merged - slot->pages_cowed;
++		}
++	}
++
++	return ret * 100 / pages;
++}
++
++/**
++ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
++ */
++static unsigned long cal_dedup_ratio_old(struct vma_slot *slot)
++{
++	unsigned long ret;
++	unsigned long pages;
++
++	pages = slot->pages;
++	if (!pages)
++		return 0;
++
++	ret = slot->pages_bemerged;
++
++	/* Thrashing area filtering */
++	if (ret && uksm_thrash_threshold) {
++		if (slot->pages_cowed * 100 / slot->pages_bemerged
++		    > uksm_thrash_threshold) {
++			ret = 0;
++		} else {
++			ret = slot->pages_bemerged - slot->pages_cowed;
++		}
++	}
++
++	return ret * 100 / pages;
++}
++
++/**
++ * stable_node_reinsert() - When the hash_strength has been adjusted, the
++ * stable tree need to be restructured, this is the function re-inserting the
++ * stable node.
++ */
++static inline void stable_node_reinsert(struct stable_node *new_node,
++					struct page *page,
++					struct rb_root *root_treep,
++					struct list_head *tree_node_listp,
++					u32 hash)
++{
++	struct rb_node **new = &root_treep->rb_node;
++	struct rb_node *parent = NULL;
++	struct stable_node *stable_node;
++	struct tree_node *tree_node;
++	struct page *tree_page;
++	int cmp;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		/* find a stable tree node with same first level hash value */
++		stable_node_hash_max(new_node, page, hash);
++		if (tree_node->count == 1) {
++			stable_node = rb_entry(tree_node->sub_root.rb_node,
++					       struct stable_node, node);
++			tree_page = get_uksm_page(stable_node, 1, 0);
++			if (tree_page) {
++				stable_node_hash_max(stable_node,
++						      tree_page, hash);
++				put_page(tree_page);
++
++				/* prepare for stable node insertion */
++
++				cmp = hash_cmp(new_node->hash_max,
++						   stable_node->hash_max);
++				parent = &stable_node->node;
++				if (cmp < 0)
++					new = &parent->rb_left;
++				else if (cmp > 0)
++					new = &parent->rb_right;
++				else
++					goto failed;
++
++				goto add_node;
++			} else {
++				/* the only stable_node deleted, the tree node
++				 * was not deleted.
++				 */
++				goto tree_node_reuse;
++			}
++		}
++
++		/* well, search the collision subtree */
++		new = &tree_node->sub_root.rb_node;
++		parent = NULL;
++		BUG_ON(!*new);
++		while (*new) {
++			int cmp;
++
++			stable_node = rb_entry(*new, struct stable_node, node);
++
++			cmp = hash_cmp(new_node->hash_max,
++					   stable_node->hash_max);
++
++			if (cmp < 0) {
++				parent = *new;
++				new = &parent->rb_left;
++			} else if (cmp > 0) {
++				parent = *new;
++				new = &parent->rb_right;
++			} else {
++				/* oh, no, still a collision */
++				goto failed;
++			}
++		}
++
++		goto add_node;
++	}
++
++	/* no tree node found */
++	tree_node = alloc_tree_node(tree_node_listp);
++	if (!tree_node) {
++		pr_err("UKSM: memory allocation error!\n");
++		goto failed;
++	} else {
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, root_treep);
++
++tree_node_reuse:
++		/* prepare for stable node insertion */
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++add_node:
++	rb_link_node(&new_node->node, parent, new);
++	rb_insert_color(&new_node->node, &tree_node->sub_root);
++	new_node->tree_node = tree_node;
++	tree_node->count++;
++	return;
++
++failed:
++	/* This can only happen when two nodes have collided
++	 * in two levels.
++	 */
++	new_node->tree_node = NULL;
++	return;
++}
++
++static inline void free_all_tree_nodes(struct list_head *list)
++{
++	struct tree_node *node, *tmp;
++
++	list_for_each_entry_safe(node, tmp, list, all_list) {
++		free_tree_node(node);
++	}
++}
++
++/**
++ * stable_tree_delta_hash() - Delta hash the stable tree from previous hash
++ * strength to the current hash_strength. It re-structures the hole tree.
++ */
++static inline void stable_tree_delta_hash(u32 prev_hash_strength)
++{
++	struct stable_node *node, *tmp;
++	struct rb_root *root_new_treep;
++	struct list_head *new_tree_node_listp;
++
++	stable_tree_index = (stable_tree_index + 1) % 2;
++	root_new_treep = &root_stable_tree[stable_tree_index];
++	new_tree_node_listp = &stable_tree_node_list[stable_tree_index];
++	*root_new_treep = RB_ROOT;
++	BUG_ON(!list_empty(new_tree_node_listp));
++
++	/*
++	 * we need to be safe, the node could be removed by get_uksm_page()
++	 */
++	list_for_each_entry_safe(node, tmp, &stable_node_list, all_list) {
++		void *addr;
++		struct page *node_page;
++		u32 hash;
++
++		/*
++		 * We are completely re-structuring the stable nodes to a new
++		 * stable tree. We don't want to touch the old tree unlinks and
++		 * old tree_nodes. The old tree_nodes will be freed at once.
++		 */
++		node_page = get_uksm_page(node, 0, 0);
++		if (!node_page)
++			continue;
++
++		if (node->tree_node) {
++			hash = node->tree_node->hash;
++
++			addr = kmap_atomic(node_page);
++
++			hash = delta_hash(addr, prev_hash_strength,
++					  hash_strength, hash);
++			kunmap_atomic(addr);
++		} else {
++			/*
++			 *it was not inserted to rbtree due to collision in last
++			 *round scan.
++			 */
++			hash = page_hash(node_page, hash_strength, 0);
++		}
++
++		stable_node_reinsert(node, node_page, root_new_treep,
++				     new_tree_node_listp, hash);
++		put_page(node_page);
++	}
++
++	root_stable_treep = root_new_treep;
++	free_all_tree_nodes(stable_tree_node_listp);
++	BUG_ON(!list_empty(stable_tree_node_listp));
++	stable_tree_node_listp = new_tree_node_listp;
++}
++
++static inline void inc_hash_strength(unsigned long delta)
++{
++	hash_strength += 1 << delta;
++	if (hash_strength > HASH_STRENGTH_MAX)
++		hash_strength = HASH_STRENGTH_MAX;
++}
++
++static inline void dec_hash_strength(unsigned long delta)
++{
++	unsigned long change = 1 << delta;
++
++	if (hash_strength <= change + 1)
++		hash_strength = 1;
++	else
++		hash_strength -= change;
++}
++
++static inline void inc_hash_strength_delta(void)
++{
++	hash_strength_delta++;
++	if (hash_strength_delta > HASH_STRENGTH_DELTA_MAX)
++		hash_strength_delta = HASH_STRENGTH_DELTA_MAX;
++}
++
++static inline unsigned long get_current_neg_ratio(void)
++{
++	u64 pos = benefit.pos;
++	u64 neg = benefit.neg;
++
++	if (!neg)
++		return 0;
++
++	if (!pos || neg > pos)
++		return 100;
++
++	if (neg > div64_u64(U64_MAX, 100))
++		pos = div64_u64(pos, 100);
++	else
++		neg *= 100;
++
++	return div64_u64(neg, pos);
++}
++
++static inline unsigned long get_current_benefit(void)
++{
++	u64 pos = benefit.pos;
++	u64 neg = benefit.neg;
++	u64 scanned = benefit.scanned;
++
++	if (neg > pos)
++		return 0;
++
++	return div64_u64((pos - neg), scanned);
++}
++
++static inline int judge_rshash_direction(void)
++{
++	u64 current_neg_ratio, stable_benefit;
++	u64 current_benefit, delta = 0;
++	int ret = STILL;
++
++	/*
++	 * Try to probe a value after the boot, and in case the system
++	 * are still for a long time.
++	 */
++	if ((fully_scanned_round & 0xFFULL) == 10) {
++		ret = OBSCURE;
++		goto out;
++	}
++
++	current_neg_ratio = get_current_neg_ratio();
++
++	if (current_neg_ratio == 0) {
++		rshash_neg_cont_zero++;
++		if (rshash_neg_cont_zero > 2)
++			return GO_DOWN;
++		else
++			return STILL;
++	}
++	rshash_neg_cont_zero = 0;
++
++	if (current_neg_ratio > 90) {
++		ret = GO_UP;
++		goto out;
++	}
++
++	current_benefit = get_current_benefit();
++	stable_benefit = rshash_state.stable_benefit;
++
++	if (!stable_benefit) {
++		ret = OBSCURE;
++		goto out;
++	}
++
++	if (current_benefit > stable_benefit)
++		delta = current_benefit - stable_benefit;
++	else if (current_benefit < stable_benefit)
++		delta = stable_benefit - current_benefit;
++
++	delta = div64_u64(100 * delta, stable_benefit);
++
++	if (delta > 50) {
++		rshash_cont_obscure++;
++		if (rshash_cont_obscure > 2)
++			return OBSCURE;
++		else
++			return STILL;
++	}
++
++out:
++	rshash_cont_obscure = 0;
++	return ret;
++}
++
++/**
++ * rshash_adjust() - The main function to control the random sampling state
++ * machine for hash strength adapting.
++ *
++ * return true if hash_strength has changed.
++ */
++static inline int rshash_adjust(void)
++{
++	unsigned long prev_hash_strength = hash_strength;
++
++	if (!encode_benefit())
++		return 0;
++
++	switch (rshash_state.state) {
++	case RSHASH_STILL:
++		switch (judge_rshash_direction()) {
++		case GO_UP:
++			if (rshash_state.pre_direct == GO_DOWN)
++				hash_strength_delta = 0;
++
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.stable_benefit = get_current_benefit();
++			rshash_state.pre_direct = GO_UP;
++			break;
++
++		case GO_DOWN:
++			if (rshash_state.pre_direct == GO_UP)
++				hash_strength_delta = 0;
++
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.stable_benefit = get_current_benefit();
++			rshash_state.pre_direct = GO_DOWN;
++			break;
++
++		case OBSCURE:
++			rshash_state.stable_point = hash_strength;
++			rshash_state.turn_point_down = hash_strength;
++			rshash_state.turn_point_up = hash_strength;
++			rshash_state.turn_benefit_down = get_current_benefit();
++			rshash_state.turn_benefit_up = get_current_benefit();
++			rshash_state.lookup_window_index = 0;
++			rshash_state.state = RSHASH_TRYDOWN;
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			break;
++
++		case STILL:
++			break;
++		default:
++			BUG();
++		}
++		break;
++
++	case RSHASH_TRYDOWN:
++		if (rshash_state.lookup_window_index++ % 5 == 0)
++			rshash_state.below_count = 0;
++
++		if (get_current_benefit() < rshash_state.stable_benefit)
++			rshash_state.below_count++;
++		else if (get_current_benefit() >
++			 rshash_state.turn_benefit_down) {
++			rshash_state.turn_point_down = hash_strength;
++			rshash_state.turn_benefit_down = get_current_benefit();
++		}
++
++		if (rshash_state.below_count >= 3 ||
++		    judge_rshash_direction() == GO_UP ||
++		    hash_strength == 1) {
++			hash_strength = rshash_state.stable_point;
++			hash_strength_delta = 0;
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.lookup_window_index = 0;
++			rshash_state.state = RSHASH_TRYUP;
++			hash_strength_delta = 0;
++		} else {
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++		}
++		break;
++
++	case RSHASH_TRYUP:
++		if (rshash_state.lookup_window_index++ % 5 == 0)
++			rshash_state.below_count = 0;
++
++		if (get_current_benefit() < rshash_state.turn_benefit_down)
++			rshash_state.below_count++;
++		else if (get_current_benefit() > rshash_state.turn_benefit_up) {
++			rshash_state.turn_point_up = hash_strength;
++			rshash_state.turn_benefit_up = get_current_benefit();
++		}
++
++		if (rshash_state.below_count >= 3 ||
++		    judge_rshash_direction() == GO_DOWN ||
++		    hash_strength == HASH_STRENGTH_MAX) {
++			hash_strength = rshash_state.turn_benefit_up >
++				rshash_state.turn_benefit_down ?
++				rshash_state.turn_point_up :
++				rshash_state.turn_point_down;
++
++			rshash_state.state = RSHASH_PRE_STILL;
++		} else {
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++		}
++
++		break;
++
++	case RSHASH_NEW:
++	case RSHASH_PRE_STILL:
++		rshash_state.stable_benefit = get_current_benefit();
++		rshash_state.state = RSHASH_STILL;
++		hash_strength_delta = 0;
++		break;
++	default:
++		BUG();
++	}
++
++	/* rshash_neg = rshash_pos = 0; */
++	reset_benefit();
++
++	if (prev_hash_strength != hash_strength)
++		stable_tree_delta_hash(prev_hash_strength);
++
++	return prev_hash_strength != hash_strength;
++}
++
++/**
++ * round_update_ladder() - The main function to do update of all the
++ * adjustments whenever a scan round is finished.
++ */
++static noinline void round_update_ladder(void)
++{
++	int i;
++	unsigned long dedup;
++	struct vma_slot *slot, *tmp_slot;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++)
++		uksm_scan_ladder[i].flags &= ~UKSM_RUNG_ROUND_FINISHED;
++
++	list_for_each_entry_safe(slot, tmp_slot, &vma_slot_dedup, dedup_list) {
++
++		/* slot may be rung_rm_slot() when mm exits */
++		if (slot->snode) {
++			dedup = cal_dedup_ratio_old(slot);
++			if (dedup && dedup >= uksm_abundant_threshold)
++				vma_rung_up(slot);
++		}
++
++		slot->pages_bemerged = 0;
++		slot->pages_cowed = 0;
++
++		list_del_init(&slot->dedup_list);
++	}
++}
++
++static void uksm_del_vma_slot(struct vma_slot *slot)
++{
++	int i, j;
++	struct rmap_list_entry *entry;
++
++	if (slot->snode) {
++		/*
++		 * In case it just failed when entering the rung, it's not
++		 * necessary.
++		 */
++		rung_rm_slot(slot);
++	}
++
++	if (!list_empty(&slot->dedup_list))
++		list_del(&slot->dedup_list);
++
++	if (!slot->rmap_list_pool || !slot->pool_counts) {
++		/* In case it OOMed in uksm_vma_enter() */
++		goto out;
++	}
++
++	for (i = 0; i < slot->pool_size; i++) {
++		void *addr;
++
++		if (!slot->rmap_list_pool[i])
++			continue;
++
++		addr = kmap(slot->rmap_list_pool[i]);
++		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
++			entry = (struct rmap_list_entry *)addr + j;
++			if (is_addr(entry->addr))
++				continue;
++			if (!entry->item)
++				continue;
++
++			remove_rmap_item_from_tree(entry->item);
++			free_rmap_item(entry->item);
++			slot->pool_counts[i]--;
++		}
++		BUG_ON(slot->pool_counts[i]);
++		kunmap(slot->rmap_list_pool[i]);
++		__free_page(slot->rmap_list_pool[i]);
++	}
++	kfree(slot->rmap_list_pool);
++	kfree(slot->pool_counts);
++
++out:
++	slot->rung = NULL;
++	if (slot->flags & UKSM_SLOT_IN_UKSM) {
++		BUG_ON(uksm_pages_total < slot->pages);
++		uksm_pages_total -= slot->pages;
++	}
++
++	if (slot->fully_scanned_round == fully_scanned_round)
++		scanned_virtual_pages -= slot->pages;
++	else
++		scanned_virtual_pages -= slot->pages_scanned;
++	free_vma_slot(slot);
++}
++
++
++#define SPIN_LOCK_PERIOD	32
++static struct vma_slot *cleanup_slots[SPIN_LOCK_PERIOD];
++static inline void cleanup_vma_slots(void)
++{
++	struct vma_slot *slot;
++	int i;
++
++	i = 0;
++	spin_lock(&vma_slot_list_lock);
++	while (!list_empty(&vma_slot_del)) {
++		slot = list_entry(vma_slot_del.next,
++				  struct vma_slot, slot_list);
++		list_del(&slot->slot_list);
++		cleanup_slots[i++] = slot;
++		if (i == SPIN_LOCK_PERIOD) {
++			spin_unlock(&vma_slot_list_lock);
++			while (--i >= 0)
++				uksm_del_vma_slot(cleanup_slots[i]);
++			i = 0;
++			spin_lock(&vma_slot_list_lock);
++		}
++	}
++	spin_unlock(&vma_slot_list_lock);
++
++	while (--i >= 0)
++		uksm_del_vma_slot(cleanup_slots[i]);
++}
++
++/*
++ * Expotional moving average formula
++ */
++static inline unsigned long ema(unsigned long curr, unsigned long last_ema)
++{
++	/*
++	 * For a very high burst, even the ema cannot work well, a false very
++	 * high per-page time estimation can result in feedback in very high
++	 * overhead of context switch and rung update -- this will then lead
++	 * to higher per-paper time, this may not converge.
++	 *
++	 * Instead, we try to approach this value in a binary manner.
++	 */
++	if (curr > last_ema * 10)
++		return last_ema * 2;
++
++	return (EMA_ALPHA * curr + (100 - EMA_ALPHA) * last_ema) / 100;
++}
++
++/*
++ * convert cpu ratio in 1/TIME_RATIO_SCALE configured by user to
++ * nanoseconds based on current uksm_sleep_jiffies.
++ */
++static inline unsigned long cpu_ratio_to_nsec(unsigned int ratio)
++{
++	return NSEC_PER_USEC * jiffies_to_usecs(uksm_sleep_jiffies) /
++		(TIME_RATIO_SCALE - ratio) * ratio;
++}
++
++
++static inline unsigned long rung_real_ratio(int cpu_time_ratio)
++{
++	unsigned long ret;
++
++	BUG_ON(!cpu_time_ratio);
++
++	if (cpu_time_ratio > 0)
++		ret = cpu_time_ratio;
++	else
++		ret = (unsigned long)(-cpu_time_ratio) *
++			uksm_max_cpu_percentage / 100UL;
++
++	return ret ? ret : 1;
++}
++
++static noinline void uksm_calc_scan_pages(void)
++{
++	struct scan_rung *ladder = uksm_scan_ladder;
++	unsigned long sleep_usecs, nsecs;
++	unsigned long ratio;
++	int i;
++	unsigned long per_page;
++
++	if (uksm_ema_page_time > 100000 ||
++	    (((unsigned long) uksm_eval_round & (256UL - 1)) == 0UL))
++		uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
++
++	per_page = uksm_ema_page_time;
++	BUG_ON(!per_page);
++
++	/*
++	 * For every 8 eval round, we try to probe a uksm_sleep_jiffies value
++	 * based on saved user input.
++	 */
++	if (((unsigned long) uksm_eval_round & (8UL - 1)) == 0UL)
++		uksm_sleep_jiffies = uksm_sleep_saved;
++
++	/* We require a rung scan at least 1 page in a period. */
++	nsecs = per_page;
++	ratio = rung_real_ratio(ladder[0].cpu_ratio);
++	if (cpu_ratio_to_nsec(ratio) < nsecs) {
++		sleep_usecs = nsecs * (TIME_RATIO_SCALE - ratio) / ratio
++				/ NSEC_PER_USEC;
++		uksm_sleep_jiffies = usecs_to_jiffies(sleep_usecs) + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		ratio = rung_real_ratio(ladder[i].cpu_ratio);
++		ladder[i].pages_to_scan = cpu_ratio_to_nsec(ratio) /
++					per_page;
++		BUG_ON(!ladder[i].pages_to_scan);
++		uksm_calc_rung_step(&ladder[i], per_page, ratio);
++	}
++}
++
++/*
++ * From the scan time of this round (ns) to next expected min sleep time
++ * (ms), be careful of the possible overflows. ratio is taken from
++ * rung_real_ratio()
++ */
++static inline
++unsigned int scan_time_to_sleep(unsigned long long scan_time, unsigned long ratio)
++{
++	scan_time >>= 20; /* to msec level now */
++	BUG_ON(scan_time > (ULONG_MAX / TIME_RATIO_SCALE));
++
++	return (unsigned int) ((unsigned long) scan_time *
++			       (TIME_RATIO_SCALE - ratio) / ratio);
++}
++
++#define __round_mask(x, y) ((__typeof__(x))((y)-1))
++#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
++
++static void uksm_vma_enter(struct vma_slot **slots, unsigned long num)
++{
++	struct scan_rung *rung;
++
++	rung = &uksm_scan_ladder[0];
++	rung_add_new_slots(rung, slots, num);
++}
++
++static struct vma_slot *batch_slots[SLOT_TREE_NODE_STORE_SIZE];
++
++static void uksm_enter_all_slots(void)
++{
++	struct vma_slot *slot;
++	unsigned long index;
++	struct list_head empty_vma_list;
++	int i;
++
++	i = 0;
++	index = 0;
++	INIT_LIST_HEAD(&empty_vma_list);
++
++	spin_lock(&vma_slot_list_lock);
++	while (!list_empty(&vma_slot_new)) {
++		slot = list_entry(vma_slot_new.next,
++				  struct vma_slot, slot_list);
++
++		if (!slot->vma->anon_vma) {
++			list_move(&slot->slot_list, &empty_vma_list);
++		} else if (vma_can_enter(slot->vma)) {
++			batch_slots[index++] = slot;
++			list_del_init(&slot->slot_list);
++		} else {
++			list_move(&slot->slot_list, &vma_slot_noadd);
++		}
++
++		if (++i == SPIN_LOCK_PERIOD ||
++		    (index && !(index % SLOT_TREE_NODE_STORE_SIZE))) {
++			spin_unlock(&vma_slot_list_lock);
++
++			if (index && !(index % SLOT_TREE_NODE_STORE_SIZE)) {
++				uksm_vma_enter(batch_slots, index);
++				index = 0;
++			}
++			i = 0;
++			cond_resched();
++			spin_lock(&vma_slot_list_lock);
++		}
++	}
++
++	list_splice(&empty_vma_list, &vma_slot_new);
++
++	spin_unlock(&vma_slot_list_lock);
++
++	if (index)
++		uksm_vma_enter(batch_slots, index);
++
++}
++
++static inline int rung_round_finished(struct scan_rung *rung)
++{
++	return rung->flags & UKSM_RUNG_ROUND_FINISHED;
++}
++
++static inline void judge_slot(struct vma_slot *slot)
++{
++	struct scan_rung *rung = slot->rung;
++	unsigned long dedup;
++	int deleted;
++
++	dedup = cal_dedup_ratio(slot);
++	if (vma_fully_scanned(slot) && uksm_thrash_threshold)
++		deleted = vma_rung_enter(slot, &uksm_scan_ladder[0]);
++	else if (dedup && dedup >= uksm_abundant_threshold)
++		deleted = vma_rung_up(slot);
++	else
++		deleted = vma_rung_down(slot);
++
++	slot->pages_merged = 0;
++	slot->pages_cowed = 0;
++	slot->this_sampled = 0;
++
++	if (vma_fully_scanned(slot))
++		slot->pages_scanned = 0;
++
++	slot->last_scanned = slot->pages_scanned;
++
++	/* If its deleted in above, then rung was already advanced. */
++	if (!deleted)
++		advance_current_scan(rung);
++}
++
++
++static inline int hash_round_finished(void)
++{
++	if (scanned_virtual_pages > (uksm_pages_total >> 2)) {
++		scanned_virtual_pages = 0;
++		if (uksm_pages_scanned)
++			fully_scanned_round++;
++
++		return 1;
++	} else {
++		return 0;
++	}
++}
++
++#define UKSM_MMSEM_BATCH	5
++#define BUSY_RETRY		100
++
++/**
++ * uksm_do_scan()  - the main worker function.
++ */
++static noinline void uksm_do_scan(void)
++{
++	struct vma_slot *slot, *iter;
++	struct mm_struct *busy_mm;
++	unsigned char round_finished, all_rungs_emtpy;
++	int i, err, mmsem_batch;
++	unsigned long pcost;
++	long long delta_exec;
++	unsigned long vpages, max_cpu_ratio;
++	unsigned long long start_time, end_time, scan_time;
++	unsigned int expected_jiffies;
++
++	might_sleep();
++
++	vpages = 0;
++
++	start_time = task_sched_runtime(current);
++	max_cpu_ratio = 0;
++	mmsem_batch = 0;
++
++	for (i = 0; i < SCAN_LADDER_SIZE;) {
++		struct scan_rung *rung = &uksm_scan_ladder[i];
++		unsigned long ratio;
++		int busy_retry;
++
++		if (!rung->pages_to_scan) {
++			i++;
++			continue;
++		}
++
++		if (!rung->vma_root.num) {
++			rung->pages_to_scan = 0;
++			i++;
++			continue;
++		}
++
++		ratio = rung_real_ratio(rung->cpu_ratio);
++		if (ratio > max_cpu_ratio)
++			max_cpu_ratio = ratio;
++
++		busy_retry = BUSY_RETRY;
++		/*
++		 * Do not consider rung_round_finished() here, just used up the
++		 * rung->pages_to_scan quota.
++		 */
++		while (rung->pages_to_scan && rung->vma_root.num &&
++		       likely(!freezing(current))) {
++			int reset = 0;
++
++			slot = rung->current_scan;
++
++			BUG_ON(vma_fully_scanned(slot));
++
++			if (mmsem_batch)
++				err = 0;
++			else
++				err = try_down_read_slot_mmap_sem(slot);
++
++			if (err == -ENOENT) {
++rm_slot:
++				rung_rm_slot(slot);
++				continue;
++			}
++
++			busy_mm = slot->mm;
++
++			if (err == -EBUSY) {
++				/* skip other vmas on the same mm */
++				do {
++					reset = advance_current_scan(rung);
++					iter = rung->current_scan;
++					busy_retry--;
++					if (iter->vma->vm_mm != busy_mm ||
++					    !busy_retry || reset)
++						break;
++				} while (1);
++
++				if (iter->vma->vm_mm != busy_mm) {
++					continue;
++				} else {
++					/* scan round finsished */
++					break;
++				}
++			}
++
++			BUG_ON(!vma_can_enter(slot->vma));
++			if (uksm_test_exit(slot->vma->vm_mm)) {
++				mmsem_batch = 0;
++				mmap_read_unlock(slot->vma->vm_mm);
++				goto rm_slot;
++			}
++
++			if (mmsem_batch)
++				mmsem_batch--;
++			else
++				mmsem_batch = UKSM_MMSEM_BATCH;
++
++			/* Ok, we have take the mmap_sem, ready to scan */
++			scan_vma_one_page(slot);
++			rung->pages_to_scan--;
++			vpages++;
++
++			if (rung->current_offset + rung->step > slot->pages - 1
++			    || vma_fully_scanned(slot)) {
++				mmap_read_unlock(slot->vma->vm_mm);
++				judge_slot(slot);
++				mmsem_batch = 0;
++			} else {
++				rung->current_offset += rung->step;
++				if (!mmsem_batch)
++					mmap_read_unlock(slot->vma->vm_mm);
++			}
++
++			busy_retry = BUSY_RETRY;
++			cond_resched();
++		}
++
++		if (mmsem_batch) {
++			mmap_read_unlock(slot->vma->vm_mm);
++			mmsem_batch = 0;
++		}
++
++		if (freezing(current))
++			break;
++
++		cond_resched();
++	}
++	end_time = task_sched_runtime(current);
++	delta_exec = end_time - start_time;
++
++	if (freezing(current))
++		return;
++
++	cleanup_vma_slots();
++	uksm_enter_all_slots();
++
++	round_finished = 1;
++	all_rungs_emtpy = 1;
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		struct scan_rung *rung = &uksm_scan_ladder[i];
++
++		if (rung->vma_root.num) {
++			all_rungs_emtpy = 0;
++			if (!rung_round_finished(rung))
++				round_finished = 0;
++		}
++	}
++
++	if (all_rungs_emtpy)
++		round_finished = 0;
++
++	if (round_finished) {
++		round_update_ladder();
++		uksm_eval_round++;
++
++		if (hash_round_finished() && rshash_adjust()) {
++			/* Reset the unstable root iff hash strength changed */
++			uksm_hash_round++;
++			root_unstable_tree = RB_ROOT;
++			free_all_tree_nodes(&unstable_tree_node_list);
++		}
++
++		/*
++		 * A number of pages can hang around indefinitely on per-cpu
++		 * pagevecs, raised page count preventing write_protect_page
++		 * from merging them.  Though it doesn't really matter much,
++		 * it is puzzling to see some stuck in pages_volatile until
++		 * other activity jostles them out, and they also prevented
++		 * LTP's KSM test from succeeding deterministically; so drain
++		 * them here (here rather than on entry to uksm_do_scan(),
++		 * so we don't IPI too often when pages_to_scan is set low).
++		 */
++		lru_add_drain_all();
++	}
++
++
++	if (vpages && delta_exec > 0) {
++		pcost = (unsigned long) delta_exec / vpages;
++		if (likely(uksm_ema_page_time))
++			uksm_ema_page_time = ema(pcost, uksm_ema_page_time);
++		else
++			uksm_ema_page_time = pcost;
++	}
++
++	uksm_calc_scan_pages();
++	uksm_sleep_real = uksm_sleep_jiffies;
++	/* in case of radical cpu bursts, apply the upper bound */
++	end_time = task_sched_runtime(current);
++	if (max_cpu_ratio && end_time > start_time) {
++		scan_time = end_time - start_time;
++		expected_jiffies = msecs_to_jiffies(
++			scan_time_to_sleep(scan_time, max_cpu_ratio));
++
++		if (expected_jiffies > uksm_sleep_real)
++			uksm_sleep_real = expected_jiffies;
++
++		/* We have a 1 second up bound for responsiveness. */
++		if (jiffies_to_msecs(uksm_sleep_real) > MSEC_PER_SEC)
++			uksm_sleep_real = msecs_to_jiffies(1000);
++	}
++
++	return;
++}
++
++static int ksmd_should_run(void)
++{
++	return uksm_run & UKSM_RUN_MERGE;
++}
++
++static int uksm_scan_thread(void *nothing)
++{
++	set_freezable();
++	set_user_nice(current, 5);
++
++	while (!kthread_should_stop()) {
++		mutex_lock(&uksm_thread_mutex);
++		if (ksmd_should_run())
++			uksm_do_scan();
++		mutex_unlock(&uksm_thread_mutex);
++
++		try_to_freeze();
++
++		if (ksmd_should_run()) {
++			schedule_timeout_interruptible(uksm_sleep_real);
++			uksm_sleep_times++;
++		} else {
++			wait_event_freezable(uksm_thread_wait,
++				ksmd_should_run() || kthread_should_stop());
++		}
++	}
++	return 0;
++}
++
++void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
++{
++	struct stable_node *stable_node;
++	struct node_vma *node_vma;
++	struct rmap_item *rmap_item;
++	int search_new_forks = 0;
++	unsigned long address;
++
++	VM_BUG_ON_PAGE(!PageKsm(page), page);
++	VM_BUG_ON_PAGE(!PageLocked(page), page);
++
++	stable_node = page_stable_node(page);
++	if (!stable_node)
++		return;
++again:
++	hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) {
++		hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) {
++			struct anon_vma *anon_vma = rmap_item->anon_vma;
++			struct anon_vma_chain *vmac;
++			struct vm_area_struct *vma;
++
++			cond_resched();
++			anon_vma_lock_read(anon_vma);
++			anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
++						       0, ULONG_MAX) {
++				cond_resched();
++				vma = vmac->vma;
++				address = get_rmap_addr(rmap_item);
++
++				if (address < vma->vm_start ||
++				    address >= vma->vm_end)
++					continue;
++
++				if ((rmap_item->slot->vma == vma) ==
++				    search_new_forks)
++					continue;
++
++				if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
++					continue;
++
++				if (!rwc->rmap_one(page, vma, address, rwc->arg)) {
++					anon_vma_unlock_read(anon_vma);
++					return;
++				}
++
++				if (rwc->done && rwc->done(page)) {
++					anon_vma_unlock_read(anon_vma);
++					return;
++				}
++			}
++			anon_vma_unlock_read(anon_vma);
++		}
++	}
++	if (!search_new_forks++)
++		goto again;
++}
++
++#ifdef CONFIG_MIGRATION
++/* Common ksm interface but may be specific to uksm */
++void ksm_migrate_page(struct page *newpage, struct page *oldpage)
++{
++	struct stable_node *stable_node;
++
++	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
++	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
++	VM_BUG_ON(newpage->mapping != oldpage->mapping);
++
++	stable_node = page_stable_node(newpage);
++	if (stable_node) {
++		VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
++		stable_node->kpfn = page_to_pfn(newpage);
++		/*
++		 * newpage->mapping was set in advance; now we need smp_wmb()
++		 * to make sure that the new stable_node->kpfn is visible
++		 * to get_ksm_page() before it can see that oldpage->mapping
++		 * has gone stale (or that PageSwapCache has been cleared).
++		 */
++		smp_wmb();
++		set_page_stable_node(oldpage, NULL);
++	}
++}
++#endif /* CONFIG_MIGRATION */
++
++#ifdef CONFIG_MEMORY_HOTREMOVE
++static struct stable_node *uksm_check_stable_tree(unsigned long start_pfn,
++						 unsigned long end_pfn)
++{
++	struct rb_node *node;
++
++	for (node = rb_first(root_stable_treep); node; node = rb_next(node)) {
++		struct stable_node *stable_node;
++
++		stable_node = rb_entry(node, struct stable_node, node);
++		if (stable_node->kpfn >= start_pfn &&
++		    stable_node->kpfn < end_pfn)
++			return stable_node;
++	}
++	return NULL;
++}
++
++static int uksm_memory_callback(struct notifier_block *self,
++			       unsigned long action, void *arg)
++{
++	struct memory_notify *mn = arg;
++	struct stable_node *stable_node;
++
++	switch (action) {
++	case MEM_GOING_OFFLINE:
++		/*
++		 * Keep it very simple for now: just lock out ksmd and
++		 * MADV_UNMERGEABLE while any memory is going offline.
++		 * mutex_lock_nested() is necessary because lockdep was alarmed
++		 * that here we take uksm_thread_mutex inside notifier chain
++		 * mutex, and later take notifier chain mutex inside
++		 * uksm_thread_mutex to unlock it.   But that's safe because both
++		 * are inside mem_hotplug_mutex.
++		 */
++		mutex_lock_nested(&uksm_thread_mutex, SINGLE_DEPTH_NESTING);
++		break;
++
++	case MEM_OFFLINE:
++		/*
++		 * Most of the work is done by page migration; but there might
++		 * be a few stable_nodes left over, still pointing to struct
++		 * pages which have been offlined: prune those from the tree.
++		 */
++		while ((stable_node = uksm_check_stable_tree(mn->start_pfn,
++					mn->start_pfn + mn->nr_pages)) != NULL)
++			remove_node_from_stable_tree(stable_node, 1, 1);
++		/* fallthrough */
++
++	case MEM_CANCEL_OFFLINE:
++		mutex_unlock(&uksm_thread_mutex);
++		break;
++	}
++	return NOTIFY_OK;
++}
++#endif /* CONFIG_MEMORY_HOTREMOVE */
++
++#ifdef CONFIG_SYSFS
++/*
++ * This all compiles without CONFIG_SYSFS, but is a waste of space.
++ */
++
++#define UKSM_ATTR_RO(_name) \
++	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
++#define UKSM_ATTR(_name) \
++	static struct kobj_attribute _name##_attr = \
++		__ATTR(_name, 0644, _name##_show, _name##_store)
++
++static ssize_t max_cpu_percentage_show(struct kobject *kobj,
++				    struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_max_cpu_percentage);
++}
++
++static ssize_t max_cpu_percentage_store(struct kobject *kobj,
++				     struct kobj_attribute *attr,
++				     const char *buf, size_t count)
++{
++	unsigned long max_cpu_percentage;
++	int err;
++
++	err = kstrtoul(buf, 10, &max_cpu_percentage);
++	if (err || max_cpu_percentage > 100)
++		return -EINVAL;
++
++	if (max_cpu_percentage == 100)
++		max_cpu_percentage = 99;
++	else if (max_cpu_percentage < 10)
++		max_cpu_percentage = 10;
++
++	uksm_max_cpu_percentage = max_cpu_percentage;
++
++	return count;
++}
++UKSM_ATTR(max_cpu_percentage);
++
++static ssize_t sleep_millisecs_show(struct kobject *kobj,
++				    struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", jiffies_to_msecs(uksm_sleep_jiffies));
++}
++
++static ssize_t sleep_millisecs_store(struct kobject *kobj,
++				     struct kobj_attribute *attr,
++				     const char *buf, size_t count)
++{
++	unsigned long msecs;
++	int err;
++
++	err = kstrtoul(buf, 10, &msecs);
++	if (err || msecs > MSEC_PER_SEC)
++		return -EINVAL;
++
++	uksm_sleep_jiffies = msecs_to_jiffies(msecs);
++	uksm_sleep_saved = uksm_sleep_jiffies;
++
++	return count;
++}
++UKSM_ATTR(sleep_millisecs);
++
++
++static ssize_t cpu_governor_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
++	int i;
++
++	buf[0] = '\0';
++	for (i = 0; i < n ; i++) {
++		if (uksm_cpu_governor == i)
++			strcat(buf, "[");
++
++		strcat(buf, uksm_cpu_governor_str[i]);
++
++		if (uksm_cpu_governor == i)
++			strcat(buf, "]");
++
++		strcat(buf, " ");
++	}
++	strcat(buf, "\n");
++
++	return strlen(buf);
++}
++
++static inline void init_performance_values(void)
++{
++	int i;
++	struct scan_rung *rung;
++	struct uksm_cpu_preset_s *preset = uksm_cpu_preset + uksm_cpu_governor;
++
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = uksm_scan_ladder + i;
++		rung->cpu_ratio = preset->cpu_ratio[i];
++		rung->cover_msecs = preset->cover_msecs[i];
++	}
++
++	uksm_max_cpu_percentage = preset->max_cpu;
++}
++
++static ssize_t cpu_governor_store(struct kobject *kobj,
++				   struct kobj_attribute *attr,
++				   const char *buf, size_t count)
++{
++	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
++
++	for (n--; n >= 0 ; n--) {
++		if (!strncmp(buf, uksm_cpu_governor_str[n],
++			     strlen(uksm_cpu_governor_str[n])))
++			break;
++	}
++
++	if (n < 0)
++		return -EINVAL;
++	else
++		uksm_cpu_governor = n;
++
++	init_performance_values();
++
++	return count;
++}
++UKSM_ATTR(cpu_governor);
++
++static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
++			char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_run);
++}
++
++static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
++			 const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = kstrtoul(buf, 10, &flags);
++	if (err || flags > UINT_MAX)
++		return -EINVAL;
++	if (flags > UKSM_RUN_MERGE)
++		return -EINVAL;
++
++	mutex_lock(&uksm_thread_mutex);
++	if (uksm_run != flags)
++		uksm_run = flags;
++	mutex_unlock(&uksm_thread_mutex);
++
++	if (flags & UKSM_RUN_MERGE)
++		wake_up_interruptible(&uksm_thread_wait);
++
++	return count;
++}
++UKSM_ATTR(run);
++
++static ssize_t abundant_threshold_show(struct kobject *kobj,
++				     struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_abundant_threshold);
++}
++
++static ssize_t abundant_threshold_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = kstrtoul(buf, 10, &flags);
++	if (err || flags > 99)
++		return -EINVAL;
++
++	uksm_abundant_threshold = flags;
++
++	return count;
++}
++UKSM_ATTR(abundant_threshold);
++
++static ssize_t thrash_threshold_show(struct kobject *kobj,
++				     struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_thrash_threshold);
++}
++
++static ssize_t thrash_threshold_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = kstrtoul(buf, 10, &flags);
++	if (err || flags > 99)
++		return -EINVAL;
++
++	uksm_thrash_threshold = flags;
++
++	return count;
++}
++UKSM_ATTR(thrash_threshold);
++
++static ssize_t cpu_ratios_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	int i, size;
++	struct scan_rung *rung;
++	char *p = buf;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		if (rung->cpu_ratio > 0)
++			size = sprintf(p, "%d ", rung->cpu_ratio);
++		else
++			size = sprintf(p, "MAX/%d ",
++					TIME_RATIO_SCALE / -rung->cpu_ratio);
++
++		p += size;
++	}
++
++	*p++ = '\n';
++	*p = '\0';
++
++	return p - buf;
++}
++
++static ssize_t cpu_ratios_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int i, cpuratios[SCAN_LADDER_SIZE], err;
++	unsigned long value;
++	struct scan_rung *rung;
++	char *p, *end = NULL;
++
++	p = kzalloc(count, GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	memcpy(p, buf, count);
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		if (i != SCAN_LADDER_SIZE - 1) {
++			end = strchr(p, ' ');
++			if (!end)
++				return -EINVAL;
++
++			*end = '\0';
++		}
++
++		if (strstr(p, "MAX/")) {
++			p = strchr(p, '/') + 1;
++			err = kstrtoul(p, 10, &value);
++			if (err || value > TIME_RATIO_SCALE || !value)
++				return -EINVAL;
++
++			cpuratios[i] = -(int) (TIME_RATIO_SCALE / value);
++		} else {
++			err = kstrtoul(p, 10, &value);
++			if (err || value > TIME_RATIO_SCALE || !value)
++				return -EINVAL;
++
++			cpuratios[i] = value;
++		}
++
++		p = end + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		rung->cpu_ratio = cpuratios[i];
++	}
++
++	return count;
++}
++UKSM_ATTR(cpu_ratios);
++
++static ssize_t eval_intervals_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	int i, size;
++	struct scan_rung *rung;
++	char *p = buf;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++		size = sprintf(p, "%u ", rung->cover_msecs);
++		p += size;
++	}
++
++	*p++ = '\n';
++	*p = '\0';
++
++	return p - buf;
++}
++
++static ssize_t eval_intervals_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int i, err;
++	unsigned long values[SCAN_LADDER_SIZE];
++	struct scan_rung *rung;
++	char *p, *end = NULL;
++	ssize_t ret = count;
++
++	p = kzalloc(count + 2, GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	memcpy(p, buf, count);
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		if (i != SCAN_LADDER_SIZE - 1) {
++			end = strchr(p, ' ');
++			if (!end) {
++				ret = -EINVAL;
++				goto out;
++			}
++
++			*end = '\0';
++		}
++
++		err = kstrtoul(p, 10, &values[i]);
++		if (err) {
++			ret = -EINVAL;
++			goto out;
++		}
++
++		p = end + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		rung->cover_msecs = values[i];
++	}
++
++out:
++	kfree(p);
++	return ret;
++}
++UKSM_ATTR(eval_intervals);
++
++static ssize_t ema_per_page_time_show(struct kobject *kobj,
++				 struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_ema_page_time);
++}
++UKSM_ATTR_RO(ema_per_page_time);
++
++static ssize_t pages_shared_show(struct kobject *kobj,
++				 struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_shared);
++}
++UKSM_ATTR_RO(pages_shared);
++
++static ssize_t pages_sharing_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_sharing);
++}
++UKSM_ATTR_RO(pages_sharing);
++
++static ssize_t pages_unshared_show(struct kobject *kobj,
++				   struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_unshared);
++}
++UKSM_ATTR_RO(pages_unshared);
++
++static ssize_t full_scans_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%llu\n", fully_scanned_round);
++}
++UKSM_ATTR_RO(full_scans);
++
++static ssize_t pages_scanned_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	unsigned long base = 0;
++	u64 delta, ret;
++
++	if (pages_scanned_stored) {
++		base = pages_scanned_base;
++		ret = pages_scanned_stored;
++		delta = uksm_pages_scanned >> base;
++		if (CAN_OVERFLOW_U64(ret, delta)) {
++			ret >>= 1;
++			delta >>= 1;
++			base++;
++			ret += delta;
++		}
++	} else {
++		ret = uksm_pages_scanned;
++	}
++
++	while (ret > ULONG_MAX) {
++		ret >>= 1;
++		base++;
++	}
++
++	if (base)
++		return sprintf(buf, "%lu * 2^%lu\n", (unsigned long)ret, base);
++	else
++		return sprintf(buf, "%lu\n", (unsigned long)ret);
++}
++UKSM_ATTR_RO(pages_scanned);
++
++static ssize_t hash_strength_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", hash_strength);
++}
++UKSM_ATTR_RO(hash_strength);
++
++static ssize_t sleep_times_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%llu\n", uksm_sleep_times);
++}
++UKSM_ATTR_RO(sleep_times);
++
++
++static struct attribute *uksm_attrs[] = {
++	&max_cpu_percentage_attr.attr,
++	&sleep_millisecs_attr.attr,
++	&cpu_governor_attr.attr,
++	&run_attr.attr,
++	&ema_per_page_time_attr.attr,
++	&pages_shared_attr.attr,
++	&pages_sharing_attr.attr,
++	&pages_unshared_attr.attr,
++	&full_scans_attr.attr,
++	&pages_scanned_attr.attr,
++	&hash_strength_attr.attr,
++	&sleep_times_attr.attr,
++	&thrash_threshold_attr.attr,
++	&abundant_threshold_attr.attr,
++	&cpu_ratios_attr.attr,
++	&eval_intervals_attr.attr,
++	NULL,
++};
++
++static struct attribute_group uksm_attr_group = {
++	.attrs = uksm_attrs,
++	.name = "uksm",
++};
++#endif /* CONFIG_SYSFS */
++
++static inline void init_scan_ladder(void)
++{
++	int i;
++	struct scan_rung *rung;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = uksm_scan_ladder + i;
++		slot_tree_init_root(&rung->vma_root);
++	}
++
++	init_performance_values();
++	uksm_calc_scan_pages();
++}
++
++static inline int cal_positive_negative_costs(void)
++{
++	struct page *p1, *p2;
++	unsigned char *addr1, *addr2;
++	unsigned long i, time_start, hash_cost;
++	unsigned long loopnum = 0;
++
++	/*IMPORTANT: volatile is needed to prevent over-optimization by gcc. */
++	volatile u32 hash;
++	volatile int ret;
++
++	p1 = alloc_page(GFP_KERNEL);
++	if (!p1)
++		return -ENOMEM;
++
++	p2 = alloc_page(GFP_KERNEL);
++	if (!p2)
++		return -ENOMEM;
++
++	addr1 = kmap_atomic(p1);
++	addr2 = kmap_atomic(p2);
++	memset(addr1, prandom_u32(), PAGE_SIZE);
++	memcpy(addr2, addr1, PAGE_SIZE);
++
++	/* make sure that the two pages differ in last byte */
++	addr2[PAGE_SIZE-1] = ~addr2[PAGE_SIZE-1];
++	kunmap_atomic(addr2);
++	kunmap_atomic(addr1);
++
++	time_start = jiffies;
++	while (jiffies - time_start < 100) {
++		for (i = 0; i < 100; i++)
++			hash = page_hash(p1, HASH_STRENGTH_FULL, 0);
++		loopnum += 100;
++	}
++	hash_cost = (jiffies - time_start);
++
++	time_start = jiffies;
++	for (i = 0; i < loopnum; i++)
++		ret = pages_identical_with_cost(p1, p2);
++	memcmp_cost = HASH_STRENGTH_FULL * (jiffies - time_start);
++	memcmp_cost /= hash_cost;
++	pr_info("UKSM: relative memcmp_cost = %lu "
++		"hash=%u cmp_ret=%d.\n",
++		memcmp_cost, hash, ret);
++
++	__free_page(p1);
++	__free_page(p2);
++	return 0;
++}
++
++static int init_zeropage_hash_table(void)
++{
++	struct page *page;
++	char *addr;
++	int i;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	addr = kmap_atomic(page);
++	memset(addr, 0, PAGE_SIZE);
++	kunmap_atomic(addr);
++
++	zero_hash_table = kmalloc_array(HASH_STRENGTH_MAX, sizeof(u32),
++		GFP_KERNEL);
++	if (!zero_hash_table)
++		return -ENOMEM;
++
++	for (i = 0; i < HASH_STRENGTH_MAX; i++)
++		zero_hash_table[i] = page_hash(page, i, 0);
++
++	__free_page(page);
++
++	return 0;
++}
++
++static inline int init_random_sampling(void)
++{
++	unsigned long i;
++
++	random_nums = kmalloc(PAGE_SIZE, GFP_KERNEL);
++	if (!random_nums)
++		return -ENOMEM;
++
++	for (i = 0; i < HASH_STRENGTH_FULL; i++)
++		random_nums[i] = i;
++
++	for (i = 0; i < HASH_STRENGTH_FULL; i++) {
++		unsigned long rand_range, swap_index, tmp;
++
++		rand_range = HASH_STRENGTH_FULL - i;
++		swap_index = i + prandom_u32() % rand_range;
++		tmp = random_nums[i];
++		random_nums[i] =  random_nums[swap_index];
++		random_nums[swap_index] = tmp;
++	}
++
++	rshash_state.state = RSHASH_NEW;
++	rshash_state.below_count = 0;
++	rshash_state.lookup_window_index = 0;
++
++	return cal_positive_negative_costs();
++}
++
++static int __init uksm_slab_init(void)
++{
++	rmap_item_cache = UKSM_KMEM_CACHE(rmap_item, 0);
++	if (!rmap_item_cache)
++		goto out;
++
++	stable_node_cache = UKSM_KMEM_CACHE(stable_node, 0);
++	if (!stable_node_cache)
++		goto out_free1;
++
++	node_vma_cache = UKSM_KMEM_CACHE(node_vma, 0);
++	if (!node_vma_cache)
++		goto out_free2;
++
++	vma_slot_cache = UKSM_KMEM_CACHE(vma_slot, 0);
++	if (!vma_slot_cache)
++		goto out_free3;
++
++	tree_node_cache = UKSM_KMEM_CACHE(tree_node, 0);
++	if (!tree_node_cache)
++		goto out_free4;
++
++	return 0;
++
++out_free4:
++	kmem_cache_destroy(vma_slot_cache);
++out_free3:
++	kmem_cache_destroy(node_vma_cache);
++out_free2:
++	kmem_cache_destroy(stable_node_cache);
++out_free1:
++	kmem_cache_destroy(rmap_item_cache);
++out:
++	return -ENOMEM;
++}
++
++static void __init uksm_slab_free(void)
++{
++	kmem_cache_destroy(stable_node_cache);
++	kmem_cache_destroy(rmap_item_cache);
++	kmem_cache_destroy(node_vma_cache);
++	kmem_cache_destroy(vma_slot_cache);
++	kmem_cache_destroy(tree_node_cache);
++}
++
++/* Common interface to ksm, different to it. */
++int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
++		unsigned long end, int advice, unsigned long *vm_flags)
++{
++	int err;
++
++	switch (advice) {
++	case MADV_MERGEABLE:
++		return 0;		/* just ignore the advice */
++
++	case MADV_UNMERGEABLE:
++		if (!(*vm_flags & VM_MERGEABLE) || !uksm_flags_can_scan(*vm_flags))
++			return 0;		/* just ignore the advice */
++
++		if (vma->anon_vma) {
++			err = unmerge_uksm_pages(vma, start, end);
++			if (err)
++				return err;
++		}
++
++		uksm_remove_vma(vma);
++		*vm_flags &= ~VM_MERGEABLE;
++		break;
++	}
++
++	return 0;
++}
++
++/* Common interface to ksm, actually the same. */
++struct page *ksm_might_need_to_copy(struct page *page,
++			struct vm_area_struct *vma, unsigned long address)
++{
++	struct anon_vma *anon_vma = page_anon_vma(page);
++	struct page *new_page;
++
++	if (PageKsm(page)) {
++		if (page_stable_node(page))
++			return page;	/* no need to copy it */
++	} else if (!anon_vma) {
++		return page;		/* no need to copy it */
++	} else if (anon_vma->root == vma->anon_vma->root &&
++		 page->index == linear_page_index(vma, address)) {
++		return page;		/* still no need to copy it */
++	}
++	if (!PageUptodate(page))
++		return page;		/* let do_swap_page report the error */
++
++	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
++	if (new_page) {
++		copy_user_highpage(new_page, page, address, vma);
++
++		SetPageDirty(new_page);
++		__SetPageUptodate(new_page);
++		__SetPageLocked(new_page);
++	}
++
++	return new_page;
++}
++
++/* Copied from mm/ksm.c and required from 5.1 */
++bool reuse_ksm_page(struct page *page,
++		    struct vm_area_struct *vma,
++		    unsigned long address)
++{
++#ifdef CONFIG_DEBUG_VM
++	if (WARN_ON(is_zero_pfn(page_to_pfn(page))) ||
++			WARN_ON(!page_mapped(page)) ||
++			WARN_ON(!PageLocked(page))) {
++		dump_page(page, "reuse_ksm_page");
++		return false;
++	}
++#endif
++
++	if (PageSwapCache(page) || !page_stable_node(page))
++		return false;
++	/* Prohibit parallel get_ksm_page() */
++	if (!page_ref_freeze(page, 1))
++		return false;
++
++	page_move_anon_rmap(page, vma);
++	page->index = linear_page_index(vma, address);
++	page_ref_unfreeze(page, 1);
++
++	return true;
++}
++
++static int __init uksm_init(void)
++{
++	struct task_struct *uksm_thread;
++	int err;
++
++	uksm_sleep_jiffies = msecs_to_jiffies(100);
++	uksm_sleep_saved = uksm_sleep_jiffies;
++
++	slot_tree_init();
++	init_scan_ladder();
++
++
++	err = init_random_sampling();
++	if (err)
++		goto out_free2;
++
++	err = uksm_slab_init();
++	if (err)
++		goto out_free1;
++
++	err = init_zeropage_hash_table();
++	if (err)
++		goto out_free0;
++
++	uksm_thread = kthread_run(uksm_scan_thread, NULL, "uksmd");
++	if (IS_ERR(uksm_thread)) {
++		pr_err("uksm: creating kthread failed\n");
++		err = PTR_ERR(uksm_thread);
++		goto out_free;
++	}
++
++#ifdef CONFIG_SYSFS
++	err = sysfs_create_group(mm_kobj, &uksm_attr_group);
++	if (err) {
++		pr_err("uksm: register sysfs failed\n");
++		kthread_stop(uksm_thread);
++		goto out_free;
++	}
++#else
++	uksm_run = UKSM_RUN_MERGE;	/* no way for user to start it */
++
++#endif /* CONFIG_SYSFS */
++
++#ifdef CONFIG_MEMORY_HOTREMOVE
++	/*
++	 * Choose a high priority since the callback takes uksm_thread_mutex:
++	 * later callbacks could only be taking locks which nest within that.
++	 */
++	hotplug_memory_notifier(uksm_memory_callback, 100);
++#endif
++	return 0;
++
++out_free:
++	kfree(zero_hash_table);
++out_free0:
++	uksm_slab_free();
++out_free1:
++	kfree(random_nums);
++out_free2:
++	kfree(uksm_scan_ladder);
++	return err;
++}
++
++#ifdef MODULE
++subsys_initcall(ksm_init);
++#else
++late_initcall(uksm_init);
++#endif
++
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index 3fb23a21f6dd..ac0c5fccf749 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -1173,6 +1173,9 @@ const char * const vmstat_text[] = {
+ 	"nr_foll_pin_acquired",
+ 	"nr_foll_pin_released",
+ 
++#ifdef CONFIG_UKSM
++	"nr_uksm_zero_pages",
++#endif
+ 	/* enum writeback_stat_item counters */
+ 	"nr_dirty_threshold",
+ 	"nr_dirty_background_threshold",
diff --git a/znver1-common.config b/znver1-common.config
index e2971cc..ac46292 100644
--- a/znver1-common.config
+++ b/znver1-common.config
@@ -1,8 +1,10 @@
 # CONFIG_GENERIC_CPU is not set
 CONFIG_MZEN=y
+# CONFIG_X86_MCE_INTEL is not set
 CONFIG_CPU_SUP_AMD=y
 # CONFIG_CPU_SUP_INTEL is not set
 # CONFIG_CPU_SUP_CENTAUR is not set
+# CONFIG_MICROCODE_INTEL is not set
 # CONFIG_INTEL_ATOMISP is not set
 # CONFIG_INTEL_SOC_PMIC_CHTWC is not set
 # CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH is not set
Not Available

benbullard79 [@T] cox.netNo Comment.1349d 09hrs
benbullard79 [@T] cox.netNo Comment.1349d 09hrs
benbullard79 [@T] cox.netNo Comment.1349d 09hrs