$ git diff --patch-with-stat --summary 968aeefc498fb4d7b47f4f3dacb02cc8f14476cc..87bc2830c2765adc879712e969d1746e773ec78d
.abf.yml | 2 +-
...n-k10temp-Remove-support-for-displaying-v.patch | 188 +
PATCH-v14-06-10-fs-ntfs3-Add-compression.patch | 2765 --------
...01-10-fs-ntfs3-Add-headers-and-misc-files.patch | 147 +-
...s-ntfs3-Add-initialization-of-super-block.patch | 115 +-
...ch => PATCH-v16-03-10-fs-ntfs3-Add-bitmap.patch | 44 +-
...s3-Add-file-operations-and-implementation.patch | 592 +-
...-v16-05-10-fs-ntfs3-Add-attrib-operations.patch | 609 +-
PATCH-v16-06-10-fs-ntfs3-Add-compression.patch | 2131 ++++++
...PATCH-v16-07-10-fs-ntfs3-Add-NTFS-journal.patch | 48 +-
...-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch | 75 +-
...3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch | 48 +-
... PATCH-v16-10-10-fs-ntfs3-Add-MAINTAINERS.patch | 55 +-
aarch64-desktop-omv-defconfig | 3 +
aarch64-server-omv-defconfig | 3 +
armv7hnl-desktop-omv-defconfig | 3 +
armv7hnl-server-omv-defconfig | 3 +
i686-desktop-gcc-omv-defconfig | 3 +
i686-server-gcc-omv-defconfig | 3 +
kernel-release.spec | 46 +-
...riot_Viper_VPN100-QUIRK_IGNORE_DEV_SUBNQN.patch | 13 +
uksm-5.10.patch | 6935 ++++++++++++++++++++
x86_64-desktop-gcc-omv-defconfig | 3 +
x86_64-server-gcc-omv-defconfig | 3 +
x86_64-znver-desktop-gcc-omv-defconfig | 3 +
x86_64-znver-server-gcc-omv-defconfig | 3 +
26 files changed, 10478 insertions(+), 3365 deletions(-)
create mode 100644 0001-Revert-hwmon-k10temp-Remove-support-for-displaying-v.patch
delete mode 100644 PATCH-v14-06-10-fs-ntfs3-Add-compression.patch
rename PATCH-v14-01-10-fs-ntfs3-Add-headers-and-misc-files.patch => PATCH-v16-01-10-fs-ntfs3-Add-headers-and-misc-files.patch (93%)
rename PATCH-v14-02-10-fs-ntfs3-Add-initialization-of-super-block.patch => PATCH-v16-02-10-fs-ntfs3-Add-initialization-of-super-block.patch (98%)
rename PATCH-v14-03-10-fs-ntfs3-Add-bitmap.patch => PATCH-v16-03-10-fs-ntfs3-Add-bitmap.patch (96%)
rename PATCH-v14-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch => PATCH-v16-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch (94%)
rename PATCH-v14-05-10-fs-ntfs3-Add-attrib-operations.patch => PATCH-v16-05-10-fs-ntfs3-Add-attrib-operations.patch (86%)
create mode 100644 PATCH-v16-06-10-fs-ntfs3-Add-compression.patch
rename PATCH-v14-07-10-fs-ntfs3-Add-NTFS-journal.patch => PATCH-v16-07-10-fs-ntfs3-Add-NTFS-journal.patch (98%)
rename PATCH-v14-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch => PATCH-v16-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch (80%)
rename PATCH-v14-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch => PATCH-v16-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch (68%)
rename PATCH-v14-10-10-fs-ntfs3-Add-MAINTAINERS.patch => PATCH-v16-10-10-fs-ntfs3-Add-MAINTAINERS.patch (63%)
create mode 100644 nvme-Patriot_Viper_VPN100-QUIRK_IGNORE_DEV_SUBNQN.patch
create mode 100644 uksm-5.10.patch
diff --git a/.abf.yml b/.abf.yml
index 6464e1d..25992a0 100644
--- a/.abf.yml
+++ b/.abf.yml
@@ -2,4 +2,4 @@ sources:
extra-wifi-drivers-20200301.tar.zst: 3390c738c7d91250714ce0f88d26371e93bc40b8
saa716x-driver.tar.xz: f9b6ef1cd6f1f71f53d9a8aadfba2cf6b5c3d7b6
linux-5.10.tar.xz: be0b909f1fbb760cc2d5cf146e1da3b2af0cf899
- patch-5.10.3.xz: 7ddbbe8a49fb5ad8f2ed7aa70c4efbfdd4685bc1
+ patch-5.10.4.xz: 5f98ab1690ac9f6a821c3ad77226a9032a383dae
diff --git a/0001-Revert-hwmon-k10temp-Remove-support-for-displaying-v.patch b/0001-Revert-hwmon-k10temp-Remove-support-for-displaying-v.patch
new file mode 100644
index 0000000..a750b09
--- /dev/null
+++ b/0001-Revert-hwmon-k10temp-Remove-support-for-displaying-v.patch
@@ -0,0 +1,188 @@
+From cb04c523c759e3704c6d8f81ad6fec7c94e09f69 Mon Sep 17 00:00:00 2001
+From: Gabriel Craciunescu <nix.or.die@gmail.com>
+Date: Mon, 28 Dec 2020 16:23:11 +0100
+Subject: [PATCH] Revert "hwmon: (k10temp) Remove support for displaying
+ voltage and current on Zen CPUs"
+
+This reverts commit 0a4e668b5d52eed8026f5d717196b02b55fb2dc6.
+
+While not perfect ( and in this case caused by wrong code ), is better
+than having nothing. Bugs could be fixed, and if not, such CPUs could be blacklisted
+from getting the voltage informations..
+---
+ drivers/hwmon/k10temp.c | 98 +++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 98 insertions(+)
+
+diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
+index 3bc2551577a3..a250481b5a97 100644
+--- a/drivers/hwmon/k10temp.c
++++ b/drivers/hwmon/k10temp.c
+@@ -11,6 +11,13 @@
+ * convert raw register values is from https://github.com/ocerman/zenpower.
+ * The information is not confirmed from chip datasheets, but experiments
+ * suggest that it provides reasonable temperature values.
++ * - Register addresses to read chip voltage and current are also from
++ * https://github.com/ocerman/zenpower, and not confirmed from chip
++ * datasheets. Current calibration is board specific and not typically
++ * shared by board vendors. For this reason, current values are
++ * normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC
++ * current. Reported values can be adjusted using the sensors configuration
++ * file.
+ */
+
+ #include <linux/bitops.h>
+@@ -102,7 +109,10 @@ struct k10temp_data {
+ int temp_offset;
+ u32 temp_adjust_mask;
+ u32 show_temp;
++ u32 svi_addr[2];
+ bool is_zen;
++ bool show_current;
++ int cfactor[2];
+ };
+
+ #define TCTL_BIT 0
+@@ -127,6 +137,16 @@ static const struct tctl_offset tctl_offset_table[] = {
+ { 0x17, "AMD Ryzen Threadripper 29", 27000 }, /* 29{20,50,70,90}[W]X */
+ };
+
++static bool is_threadripper(void)
++{
++ return strstr(boot_cpu_data.x86_model_id, "Threadripper");
++}
++
++static bool is_epyc(void)
++{
++ return strstr(boot_cpu_data.x86_model_id, "EPYC");
++}
++
+ static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
+ {
+ pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
+@@ -191,6 +211,16 @@ static const char *k10temp_temp_label[] = {
+ "Tccd8",
+ };
+
++static const char *k10temp_in_label[] = {
++ "Vcore",
++ "Vsoc",
++};
++
++static const char *k10temp_curr_label[] = {
++ "Icore",
++ "Isoc",
++};
++
+ static int k10temp_read_labels(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, const char **str)
+@@ -199,6 +229,50 @@ static int k10temp_read_labels(struct device *dev,
+ case hwmon_temp:
+ *str = k10temp_temp_label[channel];
+ break;
++ case hwmon_in:
++ *str = k10temp_in_label[channel];
++ break;
++ case hwmon_curr:
++ *str = k10temp_curr_label[channel];
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++ return 0;
++}
++
++static int k10temp_read_curr(struct device *dev, u32 attr, int channel,
++ long *val)
++{
++ struct k10temp_data *data = dev_get_drvdata(dev);
++ u32 regval;
++
++ switch (attr) {
++ case hwmon_curr_input:
++ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
++ data->svi_addr[channel], ®val);
++ *val = DIV_ROUND_CLOSEST(data->cfactor[channel] *
++ (regval & 0xff),
++ 1000);
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++ return 0;
++}
++
++static int k10temp_read_in(struct device *dev, u32 attr, int channel, long *val)
++{
++ struct k10temp_data *data = dev_get_drvdata(dev);
++ u32 regval;
++
++ switch (attr) {
++ case hwmon_in_input:
++ amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
++ data->svi_addr[channel], ®val);
++ regval = (regval >> 16) & 0xff;
++ *val = DIV_ROUND_CLOSEST(155000 - regval * 625, 100);
++ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+@@ -257,6 +331,10 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
+ switch (type) {
+ case hwmon_temp:
+ return k10temp_read_temp(dev, attr, channel, val);
++ case hwmon_in:
++ return k10temp_read_in(dev, attr, channel, val);
++ case hwmon_curr:
++ return k10temp_read_curr(dev, attr, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+@@ -305,6 +383,11 @@ static umode_t k10temp_is_visible(const void *_data,
+ return 0;
+ }
+ break;
++ case hwmon_in:
++ case hwmon_curr:
++ if (!data->show_current)
++ return 0;
++ break;
+ default:
+ return 0;
+ }
+@@ -434,10 +517,20 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ case 0x8: /* Zen+ */
+ case 0x11: /* Zen APU */
+ case 0x18: /* Zen+ APU */
++ data->show_current = !is_threadripper() && !is_epyc();
++ data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE0;
++ data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE1;
++ data->cfactor[0] = F17H_M01H_CFACTOR_ICORE;
++ data->cfactor[1] = F17H_M01H_CFACTOR_ISOC;
+ k10temp_get_ccd_support(pdev, data, 4);
+ break;
+ case 0x31: /* Zen2 Threadripper */
+ case 0x71: /* Zen2 */
++ data->show_current = !is_threadripper() && !is_epyc();
++ data->cfactor[0] = F17H_M31H_CFACTOR_ICORE;
++ data->cfactor[1] = F17H_M31H_CFACTOR_ISOC;
++ data->svi_addr[0] = F17H_M31H_SVI_TEL_PLANE0;
++ data->svi_addr[1] = F17H_M31H_SVI_TEL_PLANE1;
+ k10temp_get_ccd_support(pdev, data, 8);
+ break;
+ }
+@@ -449,6 +542,11 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+
+ switch (boot_cpu_data.x86_model) {
+ case 0x0 ... 0x1: /* Zen3 */
++ data->show_current = true;
++ data->svi_addr[0] = F19H_M01_SVI_TEL_PLANE0;
++ data->svi_addr[1] = F19H_M01_SVI_TEL_PLANE1;
++ data->cfactor[0] = F19H_M01H_CFACTOR_ICORE;
++ data->cfactor[1] = F19H_M01H_CFACTOR_ISOC;
+ k10temp_get_ccd_support(pdev, data, 8);
+ break;
+ }
+--
+2.29.2
+
diff --git a/PATCH-v14-06-10-fs-ntfs3-Add-compression.patch b/PATCH-v14-06-10-fs-ntfs3-Add-compression.patch
deleted file mode 100644
index f2cb581..0000000
--- a/PATCH-v14-06-10-fs-ntfs3-Add-compression.patch
+++ /dev/null
@@ -1,2765 +0,0 @@
-From mboxrd@z Thu Jan 1 00:00:00 1970
-Return-Path: <linux-kernel-owner@kernel.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
- aws-us-west-2-korg-lkml-1.web.codeaurora.org
-X-Spam-Level:
-X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
- DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,
- INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
- USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0
-Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id 0066FC19437
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:26 +0000 (UTC)
-Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id C17AE22C97
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:25 +0000 (UTC)
-Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730793AbgLDPsz (ORCPT
- <rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:48:55 -0500
-Received: from relayfre-01.paragon-software.com ([176.12.100.13]:55260 "EHLO
- relayfre-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730725AbgLDPsx (ORCPT
- <rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:48:53 -0500
-Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relayfre-01.paragon-software.com (Postfix) with ESMTPS id F23BC1D46;
- Fri, 4 Dec 2020 18:48:07 +0300 (MSK)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096888;
- bh=olF7t09va2dJgyB7TKJjOcTMfHeDodUuFOGcQaLt2Jk=;
- h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=ey3LfTWtqAWEe/vrWDwuRmgmgsULDDF+eulYphpjiTPId+vOHQVnHHNBZl2GZVoBT
- 7qfcU73H84w/ErrZvT27bzKw5S/YNxeSt9eW89XGLe9aZeAFWNFB7INxYHrGuO3RdA
- Phsw0jwoNJ3RDa1xFWBGDjJGpuOkpvma/sx9VhCQ=
-Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
- vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
- (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:07 +0300
-From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-To: <linux-fsdevel@vger.kernel.org>
-CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
- <pali@kernel.org>, <dsterba@suse.cz>, <aaptel@suse.com>,
- <willy@infradead.org>, <rdunlap@infradead.org>, <joe@perches.com>,
- <mark@harmstone.com>, <nborisov@suse.com>,
- <linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
- <dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
- Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 06/10] fs/ntfs3: Add compression
-Date: Fri, 4 Dec 2020 18:45:56 +0300
-Message-ID: <20201204154600.1546096-7-almaz.alexandrovich@paragon-software.com>
-X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-Content-Type: text/plain
-X-Originating-IP: [172.30.114.105]
-X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
- vdlg-exch-02.paragon-software.com (172.30.1.105)
-Precedence: bulk
-List-ID: <linux-kernel.vger.kernel.org>
-X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-7-almaz.alexandrovich@paragon-software.com/>
-List-Archive: <https://lore.kernel.org/lkml/>
-List-Post: <mailto:linux-kernel@vger.kernel.org>
-
-This adds compression
-
-Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
----
- fs/ntfs3/lib/common_defs.h | 196 +++++++++++
- fs/ntfs3/lib/decompress_common.c | 314 +++++++++++++++++
- fs/ntfs3/lib/decompress_common.h | 558 +++++++++++++++++++++++++++++++
- fs/ntfs3/lib/lzx_common.c | 204 +++++++++++
- fs/ntfs3/lib/lzx_common.h | 31 ++
- fs/ntfs3/lib/lzx_constants.h | 113 +++++++
- fs/ntfs3/lib/lzx_decompress.c | 553 ++++++++++++++++++++++++++++++
- fs/ntfs3/lib/xpress_constants.h | 23 ++
- fs/ntfs3/lib/xpress_decompress.c | 165 +++++++++
- fs/ntfs3/lznt.c | 452 +++++++++++++++++++++++++
- 10 files changed, 2609 insertions(+)
- create mode 100644 fs/ntfs3/lib/common_defs.h
- create mode 100644 fs/ntfs3/lib/decompress_common.c
- create mode 100644 fs/ntfs3/lib/decompress_common.h
- create mode 100644 fs/ntfs3/lib/lzx_common.c
- create mode 100644 fs/ntfs3/lib/lzx_common.h
- create mode 100644 fs/ntfs3/lib/lzx_constants.h
- create mode 100644 fs/ntfs3/lib/lzx_decompress.c
- create mode 100644 fs/ntfs3/lib/xpress_constants.h
- create mode 100644 fs/ntfs3/lib/xpress_decompress.c
- create mode 100644 fs/ntfs3/lznt.c
-
-diff --git a/fs/ntfs3/lib/common_defs.h b/fs/ntfs3/lib/common_defs.h
-new file mode 100644
-index 000000000000..2114e37872fb
---- /dev/null
-+++ b/fs/ntfs3/lib/common_defs.h
-@@ -0,0 +1,196 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+/*
-+ * Copyright (C) 2012-2016 Eric Biggers
-+ *
-+ * Adapted for linux kernel by Alexander Mamaev:
-+ * - remove implementations of get_unaligned_
-+ * - remove SSE and AVX instructions
-+ * - assume GCC is always defined
-+ * - inlined aligned_malloc/aligned_free
-+ * - ISO C90
-+ * - linux kernel code style
-+ */
-+
-+#ifndef _COMMON_DEFS_H
-+#define _COMMON_DEFS_H
-+
-+#include <linux/string.h>
-+#include <linux/compiler.h>
-+#include <linux/types.h>
-+#include <linux/slab.h>
-+#include <asm/unaligned.h>
-+
-+
-+/* ========================================================================== */
-+/* Type definitions */
-+/* ========================================================================== */
-+
-+/*
-+ * Type of a machine word. 'u32 long' would be logical, but that is only
-+ * 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best
-+ * we can do without a bunch of #ifdefs appears to be 'size_t'.
-+ */
-+
-+#define WORDBYTES sizeof(size_t)
-+#define WORDBITS (8 * WORDBYTES)
-+
-+/* ========================================================================== */
-+/* Compiler-specific definitions */
-+/* ========================================================================== */
-+
-+# define forceinline __always_inline
-+# define _aligned_attribute(n) __aligned(n)
-+# define bsr32(n) (31 - __builtin_clz(n))
-+# define bsr64(n) (63 - __builtin_clzll(n))
-+# define bsf32(n) __builtin_ctz(n)
-+# define bsf64(n) __builtin_ctzll(n)
-+
-+/* STATIC_ASSERT() - verify the truth of an expression at compilation time */
-+#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
-+
-+/* STATIC_ASSERT_ZERO() - verify the truth of an expression at compilation time
-+ * and also produce a result of value '0' to be used in constant expressions
-+ */
-+#define STATIC_ASSERT_ZERO(expr) ((int)sizeof(char[-!(expr)]))
-+
-+/* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
-+ * can be performed efficiently on the target platform.
-+ */
-+#if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED)
-+# define UNALIGNED_ACCESS_IS_FAST 1
-+#else
-+# define UNALIGNED_ACCESS_IS_FAST 0
-+#endif
-+
-+/* ========================================================================== */
-+/* Unaligned memory accesses */
-+/* ========================================================================== */
-+
-+#define load_word_unaligned(p) get_unaligned((const size_t *)(p))
-+#define store_word_unaligned(v, p) put_unaligned((v), (size_t *)(p))
-+
-+
-+/* ========================================================================== */
-+/* Bit scan functions */
-+/* ========================================================================== */
-+
-+/*
-+ * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
-+ * significant end) of the *most* significant 1 bit in the input value. The
-+ * input value must be nonzero!
-+ */
-+
-+#ifndef bsr32
-+static forceinline u32
-+bsr32(u32 v)
-+{
-+ u32 bit = 0;
-+
-+ while ((v >>= 1) != 0)
-+ bit++;
-+ return bit;
-+}
-+#endif
-+
-+#ifndef bsr64
-+static forceinline u32
-+bsr64(u64 v)
-+{
-+ u32 bit = 0;
-+
-+ while ((v >>= 1) != 0)
-+ bit++;
-+ return bit;
-+}
-+#endif
-+
-+static forceinline u32
-+bsrw(size_t v)
-+{
-+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
-+ if (WORDBITS == 32)
-+ return bsr32(v);
-+ else
-+ return bsr64(v);
-+}
-+
-+/*
-+ * Bit Scan Forward (BSF) - find the 0-based index (relative to the least
-+ * significant end) of the *least* significant 1 bit in the input value. The
-+ * input value must be nonzero!
-+ */
-+
-+#ifndef bsf32
-+static forceinline u32
-+bsf32(u32 v)
-+{
-+ u32 bit;
-+
-+ for (bit = 0; !(v & 1); bit++, v >>= 1)
-+ ;
-+ return bit;
-+}
-+#endif
-+
-+#ifndef bsf64
-+static forceinline u32
-+bsf64(u64 v)
-+{
-+ u32 bit;
-+
-+ for (bit = 0; !(v & 1); bit++, v >>= 1)
-+ ;
-+ return bit;
-+}
-+#endif
-+
-+static forceinline u32
-+bsfw(size_t v)
-+{
-+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
-+ if (WORDBITS == 32)
-+ return bsf32(v);
-+ else
-+ return bsf64(v);
-+}
-+
-+/* Return the log base 2 of 'n', rounded up to the nearest integer. */
-+static forceinline u32
-+ilog2_ceil(size_t n)
-+{
-+ if (n <= 1)
-+ return 0;
-+ return 1 + bsrw(n - 1);
-+}
-+
-+/* ========================================================================== */
-+/* Aligned memory allocation */
-+/* ========================================================================== */
-+
-+static forceinline void *
-+aligned_malloc(size_t size, size_t alignment)
-+{
-+ const uintptr_t mask = alignment - 1;
-+ char *ptr = NULL;
-+ char *raw_ptr;
-+
-+ raw_ptr = kmalloc(mask + sizeof(size_t) + size, GFP_NOFS);
-+ if (raw_ptr) {
-+ ptr = (char *)raw_ptr + sizeof(size_t);
-+ ptr = (void *)(((uintptr_t)ptr + mask) & ~mask);
-+ *((size_t *)ptr - 1) = ptr - raw_ptr;
-+ }
-+ return ptr;
-+}
-+
-+static forceinline void
-+aligned_free(void *ptr)
-+{
-+ if (ptr)
-+ kfree((char *)ptr - *((size_t *)ptr - 1));
-+}
-+
-+extern void *aligned_malloc(size_t size, size_t alignment);
-+extern void aligned_free(void *ptr);
-+
-+#endif /* _COMMON_DEFS_H */
-diff --git a/fs/ntfs3/lib/decompress_common.c b/fs/ntfs3/lib/decompress_common.c
-new file mode 100644
-index 000000000000..f6381d214f48
---- /dev/null
-+++ b/fs/ntfs3/lib/decompress_common.c
-@@ -0,0 +1,314 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * decompress_common.c
-+ *
-+ * Code for decompression shared among multiple compression formats.
-+ *
-+ * The following copying information applies to this specific source code file:
-+ *
-+ * Written in 2012-2016 by Eric Biggers <ebiggers3@gmail.com>
-+ *
-+ * To the extent possible under law, the author(s) have dedicated all copyright
-+ * and related and neighboring rights to this software to the public domain
-+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
-+ * Dedication (the "CC0").
-+ *
-+ * This software is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
-+ *
-+ * You should have received a copy of the CC0 along with this software; if not
-+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
-+ */
-+
-+#include "decompress_common.h"
-+
-+/*
-+ * make_huffman_decode_table() -
-+ *
-+ * Given an alphabet of symbols and the length of each symbol's codeword in a
-+ * canonical prefix code, build a table for quickly decoding symbols that were
-+ * encoded with that code.
-+ *
-+ * A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols
-+ * such that no whole codeword is a prefix of any other. A prefix code might be
-+ * a _Huffman code_, which means that it is an optimum prefix code for a given
-+ * list of symbol frequencies and was generated by the Huffman algorithm.
-+ * Although the prefix codes processed here will ordinarily be "Huffman codes",
-+ * strictly speaking the decoder cannot know whether a given code was actually
-+ * generated by the Huffman algorithm or not.
-+ *
-+ * A prefix code is _canonical_ if and only if a longer codeword never
-+ * lexicographically precedes a shorter codeword, and the lexicographic ordering
-+ * of codewords of equal length is the same as the lexicographic ordering of the
-+ * corresponding symbols. The advantage of using a canonical prefix code is
-+ * that the codewords can be reconstructed from only the symbol => codeword
-+ * length mapping. This eliminates the need to transmit the codewords
-+ * explicitly. Instead, they can be enumerated in lexicographic order after
-+ * sorting the symbols primarily by increasing codeword length and secondarily
-+ * by increasing symbol value.
-+ *
-+ * However, the decoder's real goal is to decode symbols with the code, not just
-+ * generate the list of codewords. Consequently, this function directly builds
-+ * a table for efficiently decoding symbols using the code. The basic idea is
-+ * that given the next 'max_codeword_len' bits of input, the decoder can look up
-+ * the next decoded symbol by indexing a table containing '2^max_codeword_len'
-+ * entries. A codeword with length 'max_codeword_len' will have exactly one
-+ * entry in this table, whereas a codeword shorter than 'max_codeword_len' will
-+ * have multiple entries in this table. Precisely, a codeword of length 'n'
-+ * will have '2^(max_codeword_len - n)' entries. The index of each such entry,
-+ * considered as a bitstring of length 'max_codeword_len', will contain the
-+ * corresponding codeword as a prefix.
-+ *
-+ * That's the basic idea, but we extend it in two ways:
-+ *
-+ * - Often the maximum codeword length is too long for it to be efficient to
-+ * build the full decode table whenever a new code is used. Instead, we build
-+ * a "root" table using only '2^table_bits' entries, where 'table_bits <=
-+ * max_codeword_len'. Then, a lookup of 'table_bits' bits produces either a
-+ * symbol directly (for codewords not longer than 'table_bits'), or the index
-+ * of a subtable which must be indexed with additional bits of input to fully
-+ * decode the symbol (for codewords longer than 'table_bits').
-+ *
-+ * - Whenever the decoder decodes a symbol, it needs to know the codeword length
-+ * so that it can remove the appropriate number of input bits. The obvious
-+ * solution would be to simply retain the codeword lengths array and use the
-+ * decoded symbol as an index into it. However, that would require two array
-+ * accesses when decoding each symbol. Our strategy is to instead store the
-+ * codeword length directly in the decode table entry along with the symbol.
-+ *
-+ * See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table
-+ * entries, and see read_huffsym() for full details on how symbols are decoded.
-+ *
-+ * @decode_table:
-+ * The array in which to build the decode table. This must have been
-+ * declared by the DECODE_TABLE() macro. This may alias @lens, since all
-+ * @lens are consumed before the decode table is written to.
-+ *
-+ * @num_syms:
-+ * The number of symbols in the alphabet.
-+ *
-+ * @table_bits:
-+ * The log base 2 of the number of entries in the root table.
-+ *
-+ * @lens:
-+ * An array of length @num_syms, indexed by symbol, that gives the length
-+ * of the codeword, in bits, for each symbol. The length can be 0, which
-+ * means that the symbol does not have a codeword assigned. In addition,
-+ * @lens may alias @decode_table, as noted above.
-+ *
-+ * @max_codeword_len:
-+ * The maximum codeword length permitted for this code. All entries in
-+ * 'lens' must be less than or equal to this value.
-+ *
-+ * @working_space
-+ * A temporary array that was declared with DECODE_TABLE_WORKING_SPACE().
-+ *
-+ * Returns 0 on success, or -1 if the lengths do not form a valid prefix code.
-+ */
-+int
-+make_huffman_decode_table(u16 decode_table[], u32 num_syms,
-+ u32 table_bits, const u8 lens[],
-+ u32 max_codeword_len, u16 working_space[])
-+{
-+ u16 * const len_counts = &working_space[0];
-+ u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
-+ u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
-+ s32 remainder = 1;
-+ void *entry_ptr = decode_table;
-+ u32 codeword_len = 1;
-+ u32 sym_idx;
-+ u32 codeword;
-+ u32 subtable_pos;
-+ u32 subtable_bits;
-+ u32 subtable_prefix;
-+ u32 len;
-+ u32 sym;
-+ u32 stores_per_loop;
-+
-+ /* Count how many codewords have each length, including 0. */
-+ for (len = 0; len <= max_codeword_len; len++)
-+ len_counts[len] = 0;
-+ for (sym = 0; sym < num_syms; sym++)
-+ len_counts[lens[sym]]++;
-+
-+ /* It is already guaranteed that all lengths are <= max_codeword_len,
-+ * but it cannot be assumed they form a complete prefix code. A
-+ * codeword of length n should require a proportion of the codespace
-+ * equaling (1/2)^n. The code is complete if and only if, by this
-+ * measure, the codespace is exactly filled by the lengths.
-+ */
-+ for (len = 1; len <= max_codeword_len; len++) {
-+ remainder = (remainder << 1) - len_counts[len];
-+ /* Do the lengths overflow the codespace? */
-+ if (unlikely(remainder < 0))
-+ return -1;
-+ }
-+
-+ if (remainder != 0) {
-+ /* The lengths do not fill the codespace; that is, they form an
-+ * incomplete code. This is permitted only if the code is empty
-+ * (contains no symbols).
-+ */
-+
-+ if (unlikely(remainder != 1U << max_codeword_len))
-+ return -1;
-+
-+ /* The code is empty. When processing a well-formed stream, the
-+ * decode table need not be initialized in this case. However,
-+ * we cannot assume the stream is well-formed, so we must
-+ * initialize the decode table anyway. Setting all entries to 0
-+ * makes the decode table always produce symbol '0' without
-+ * consuming any bits, which is good enough.
-+ */
-+ memset(decode_table, 0, sizeof(decode_table[0]) << table_bits);
-+ return 0;
-+ }
-+
-+ /* Sort the symbols primarily by increasing codeword length and
-+ * secondarily by increasing symbol value.
-+ */
-+
-+ /* Initialize 'offsets' so that 'offsets[len]' is the number of
-+ * codewords shorter than 'len' bits, including length 0.
-+ */
-+ offsets[0] = 0;
-+ for (len = 0; len < max_codeword_len; len++)
-+ offsets[len + 1] = offsets[len] + len_counts[len];
-+
-+ /* Use the 'offsets' array to sort the symbols. */
-+ for (sym = 0; sym < num_syms; sym++)
-+ sorted_syms[offsets[lens[sym]]++] = sym;
-+
-+ /*
-+ * Fill the root table entries for codewords no longer than table_bits.
-+ *
-+ * The table will start with entries for the shortest codeword(s), which
-+ * will have the most entries. From there, the number of entries per
-+ * codeword will decrease. As an optimization, we may begin filling
-+ * entries with SSE2 vector accesses (8 entries/store), then change to
-+ * word accesses (2 or 4 entries/store), then change to 16-bit accesses
-+ * (1 entry/store).
-+ */
-+ sym_idx = offsets[0];
-+
-+ /* Fill entries one word (2 or 4 entries) at a time. */
-+ for (stores_per_loop = (1U << (table_bits - codeword_len)) /
-+ (WORDBYTES / sizeof(decode_table[0]));
-+ stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1){
-+ u32 end_sym_idx = sym_idx + len_counts[codeword_len];
-+
-+ for (; sym_idx < end_sym_idx; sym_idx++) {
-+ /* Accessing the array of u16 as u32 or u64 would
-+ * violate strict aliasing and would require compiling
-+ * the code with -fno-strict-aliasing to guarantee
-+ * correctness. To work around this problem, use the
-+ * gcc 'may_alias' extension.
-+ */
-+ size_t v = repeat_u16(
-+ MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
-+ codeword_len));
-+ u32 n = stores_per_loop;
-+
-+ do {
-+ *(size_t __attribute__((may_alias)) *)entry_ptr = v;
-+ entry_ptr += sizeof(v);
-+ } while (--n);
-+ }
-+ }
-+
-+ /* Fill entries one at a time. */
-+ for (stores_per_loop = (1U << (table_bits - codeword_len));
-+ stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1){
-+ u32 end_sym_idx = sym_idx + len_counts[codeword_len];
-+
-+ for (; sym_idx < end_sym_idx; sym_idx++) {
-+ u16 v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
-+ codeword_len);
-+ u32 n = stores_per_loop;
-+
-+ do {
-+ *(u16 *)entry_ptr = v;
-+ entry_ptr += sizeof(v);
-+ } while (--n);
-+ }
-+ }
-+
-+ /* If all symbols were processed, then no subtables are required. */
-+ if (sym_idx == num_syms)
-+ return 0;
-+
-+ /* At least one subtable is required. Process the remaining symbols. */
-+ codeword = ((u16 *)entry_ptr - decode_table) << 1;
-+ subtable_pos = 1U << table_bits;
-+ subtable_bits = table_bits;
-+ subtable_prefix = -1;
-+ do {
-+ u32 prefix;
-+ u16 entry;
-+ u32 n;
-+
-+ while (len_counts[codeword_len] == 0) {
-+ codeword_len++;
-+ codeword <<= 1;
-+ }
-+
-+ prefix = codeword >> (codeword_len - table_bits);
-+
-+ /* Start a new subtable if the first 'table_bits' bits of the
-+ * codeword don't match the prefix for the previous subtable, or
-+ * if this will be the first subtable.
-+ */
-+ if (prefix != subtable_prefix) {
-+
-+ subtable_prefix = prefix;
-+
-+ /*
-+ * Calculate the subtable length. If the codeword
-+ * length exceeds 'table_bits' by n, then the subtable
-+ * needs at least 2^n entries. But it may need more; if
-+ * there are fewer than 2^n codewords of length
-+ * 'table_bits + n' remaining, then n will need to be
-+ * incremented to bring in longer codewords until the
-+ * subtable can be filled completely. Note that it
-+ * always will, eventually, be possible to fill the
-+ * subtable, since it was previously verified that the
-+ * code is complete.
-+ */
-+ subtable_bits = codeword_len - table_bits;
-+ remainder = (s32)1 << subtable_bits;
-+ for (;;) {
-+ remainder -= len_counts[table_bits +
-+ subtable_bits];
-+ if (remainder <= 0)
-+ break;
-+ subtable_bits++;
-+ remainder <<= 1;
-+ }
-+
-+ /* Create the entry that points from the root table to
-+ * the subtable. This entry contains the index of the
-+ * start of the subtable and the number of bits with
-+ * which the subtable is indexed (the log base 2 of the
-+ * number of entries it contains).
-+ */
-+ decode_table[subtable_prefix] =
-+ MAKE_DECODE_TABLE_ENTRY(subtable_pos,
-+ subtable_bits);
-+ }
-+
-+ /* Fill the subtable entries for this symbol. */
-+ entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
-+ codeword_len - table_bits);
-+ n = 1U << (subtable_bits - (codeword_len -
-+ table_bits));
-+ do {
-+ decode_table[subtable_pos++] = entry;
-+ } while (--n);
-+
-+ len_counts[codeword_len]--;
-+ codeword++;
-+ } while (++sym_idx < num_syms);
-+
-+ return 0;
-+}
-diff --git a/fs/ntfs3/lib/decompress_common.h b/fs/ntfs3/lib/decompress_common.h
-new file mode 100644
-index 000000000000..11f644687395
---- /dev/null
-+++ b/fs/ntfs3/lib/decompress_common.h
-@@ -0,0 +1,558 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+/*
-+ * decompress_common.h
-+ *
-+ * Header for decompression code shared by multiple compression formats.
-+ *
-+ * The following copying information applies to this specific source code file:
-+ *
-+ * Written in 2012-2016 by Eric Biggers <ebiggers3@gmail.com>
-+ *
-+ * To the extent possible under law, the author(s) have dedicated all copyright
-+ * and related and neighboring rights to this software to the public domain
-+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
-+ * Dedication (the "CC0").
-+ *
-+ * This software is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
-+ *
-+ * You should have received a copy of the CC0 along with this software; if not
-+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
-+ */
-+
-+#ifndef _DECOMPRESS_COMMON_H
-+#define _DECOMPRESS_COMMON_H
-+
-+#include "common_defs.h"
-+
-+/******************************************************************************/
-+/* Input bitstream for XPRESS and LZX */
-+/*----------------------------------------------------------------------------*/
-+
-+/* Structure that encapsulates a block of in-memory data being interpreted as a
-+ * stream of bits, optionally with interwoven literal bytes. Bits are assumed
-+ * to be stored in little endian 16-bit coding units, with the bits ordered high
-+ * to low.
-+ */
-+struct input_bitstream {
-+
-+ /* Bits that have been read from the input buffer. The bits are
-+ * left-justified; the next bit is always bit 31.
-+ */
-+ u32 bitbuf;
-+
-+ /* Number of bits currently held in @bitbuf. */
-+ u32 bitsleft;
-+
-+ /* Pointer to the next byte to be retrieved from the input buffer. */
-+ const u8 *next;
-+
-+ /* Pointer past the end of the input buffer. */
-+ const u8 *end;
-+};
-+
-+/* Initialize a bitstream to read from the specified input buffer. */
-+static forceinline void
-+init_input_bitstream(struct input_bitstream *is, const void *buffer, u32 size)
-+{
-+ is->bitbuf = 0;
-+ is->bitsleft = 0;
-+ is->next = buffer;
-+ is->end = is->next + size;
-+}
-+
-+/* Note: for performance reasons, the following methods don't return error codes
-+ * to the caller if the input buffer is overrun. Instead, they just assume that
-+ * all overrun data is zeroes. This has no effect on well-formed compressed
-+ * data. The only disadvantage is that bad compressed data may go undetected,
-+ * but even this is irrelevant if higher level code checksums the uncompressed
-+ * data anyway.
-+ */
-+
-+/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
-+ * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
-+ * may be called on the bitstream to peek or remove up to @num_bits bits.
-+ */
-+static forceinline void
-+bitstream_ensure_bits(struct input_bitstream *is, const u32 num_bits)
-+{
-+ /* This currently works for at most 17 bits. */
-+
-+ if (is->bitsleft >= num_bits)
-+ return;
-+
-+ if (unlikely(is->end - is->next < 2))
-+ goto overflow;
-+
-+ is->bitbuf |= (u32)get_unaligned_le16(is->next) << (16 - is->bitsleft);
-+ is->next += 2;
-+ is->bitsleft += 16;
-+
-+ if (unlikely(num_bits == 17 && is->bitsleft == 16)) {
-+ if (unlikely(is->end - is->next < 2))
-+ goto overflow;
-+
-+ is->bitbuf |= (u32)get_unaligned_le16(is->next);
-+ is->next += 2;
-+ is->bitsleft = 32;
-+ }
-+
-+ return;
-+
-+overflow:
-+ is->bitsleft = 32;
-+}
-+
-+/* Return the next @num_bits bits from the bitstream, without removing them.
-+ * There must be at least @num_bits remaining in the buffer variable, from a
-+ * previous call to bitstream_ensure_bits().
-+ */
-+static forceinline u32
-+bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
-+{
-+ return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
-+}
-+
-+/* Remove @num_bits from the bitstream. There must be at least @num_bits
-+ * remaining in the buffer variable, from a previous call to
-+ * bitstream_ensure_bits().
-+ */
-+static forceinline void
-+bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
-+{
-+ is->bitbuf <<= num_bits;
-+ is->bitsleft -= num_bits;
-+}
-+
-+/* Remove and return @num_bits bits from the bitstream. There must be at least
-+ * @num_bits remaining in the buffer variable, from a previous call to
-+ * bitstream_ensure_bits().
-+ */
-+static forceinline u32
-+bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
-+{
-+ u32 bits = bitstream_peek_bits(is, num_bits);
-+
-+ bitstream_remove_bits(is, num_bits);
-+ return bits;
-+}
-+
-+/* Read and return the next @num_bits bits from the bitstream. */
-+static forceinline u32
-+bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
-+{
-+ bitstream_ensure_bits(is, num_bits);
-+ return bitstream_pop_bits(is, num_bits);
-+}
-+
-+/* Read and return the next literal byte embedded in the bitstream. */
-+static forceinline u8
-+bitstream_read_byte(struct input_bitstream *is)
-+{
-+ if (unlikely(is->end == is->next))
-+ return 0;
-+ return *is->next++;
-+}
-+
-+/* Read and return the next 16-bit integer embedded in the bitstream. */
-+static forceinline u16
-+bitstream_read_u16(struct input_bitstream *is)
-+{
-+ u16 v;
-+
-+ if (unlikely(is->end - is->next < 2))
-+ return 0;
-+ v = get_unaligned_le16(is->next);
-+ is->next += 2;
-+ return v;
-+}
-+
-+/* Read and return the next 32-bit integer embedded in the bitstream. */
-+static forceinline u32
-+bitstream_read_u32(struct input_bitstream *is)
-+{
-+ u32 v;
-+
-+ if (unlikely(is->end - is->next < 4))
-+ return 0;
-+ v = get_unaligned_le32(is->next);
-+ is->next += 4;
-+ return v;
-+}
-+
-+/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
-+ * Return 0 if there were enough bytes remaining in the input, otherwise -1.
-+ */
-+static forceinline int
-+bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count)
-+{
-+ if (unlikely(is->end - is->next < count))
-+ return -1;
-+ memcpy(dst_buffer, is->next, count);
-+ is->next += count;
-+ return 0;
-+}
-+
-+/* Align the input bitstream on a coding-unit boundary. */
-+static forceinline void
-+bitstream_align(struct input_bitstream *is)
-+{
-+ is->bitsleft = 0;
-+ is->bitbuf = 0;
-+}
-+
-+/******************************************************************************/
-+/* Huffman decoding */
-+/*----------------------------------------------------------------------------*/
-+
-+/*
-+ * Required alignment for the Huffman decode tables. We require this alignment
-+ * so that we can fill the entries with vector or word instructions and not have
-+ * to deal with misaligned buffers.
-+ */
-+#define DECODE_TABLE_ALIGNMENT 16
-+
-+/*
-+ * Each decode table entry is 16 bits divided into two fields: 'symbol' (high 12
-+ * bits) and 'length' (low 4 bits). The precise meaning of these fields depends
-+ * on the type of entry:
-+ *
-+ * Root table entries which are *not* subtable pointers:
-+ * symbol: symbol to decode
-+ * length: codeword length in bits
-+ *
-+ * Root table entries which are subtable pointers:
-+ * symbol: index of start of subtable
-+ * length: number of bits with which the subtable is indexed
-+ *
-+ * Subtable entries:
-+ * symbol: symbol to decode
-+ * length: codeword length in bits, minus the number of bits with which the
-+ * root table is indexed
-+ */
-+#define DECODE_TABLE_SYMBOL_SHIFT 4
-+#define DECODE_TABLE_MAX_SYMBOL ((1 << (16 - DECODE_TABLE_SYMBOL_SHIFT)) - 1)
-+#define DECODE_TABLE_MAX_LENGTH ((1 << DECODE_TABLE_SYMBOL_SHIFT) - 1)
-+#define DECODE_TABLE_LENGTH_MASK DECODE_TABLE_MAX_LENGTH
-+#define MAKE_DECODE_TABLE_ENTRY(symbol, length) \
-+ (((symbol) << DECODE_TABLE_SYMBOL_SHIFT) | (length))
-+
-+/*
-+ * Read and return the next Huffman-encoded symbol from the given bitstream
-+ * using the given decode table.
-+ *
-+ * If the input data is exhausted, then the Huffman symbol will be decoded as if
-+ * the missing bits were all zeroes.
-+ *
-+ * XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in
-+ * lzms_decompress.c; keep them in sync!
-+ */
-+static forceinline u32
-+read_huffsym(struct input_bitstream *is, const u16 decode_table[],
-+ u32 table_bits, u32 max_codeword_len)
-+{
-+ u32 entry;
-+ u32 symbol;
-+ u32 length;
-+
-+ /* Preload the bitbuffer with 'max_codeword_len' bits so that we're
-+ * guaranteed to be able to fully decode a codeword.
-+ */
-+ bitstream_ensure_bits(is, max_codeword_len);
-+
-+ /* Index the root table by the next 'table_bits' bits of input. */
-+ entry = decode_table[bitstream_peek_bits(is, table_bits)];
-+
-+ /* Extract the "symbol" and "length" from the entry. */
-+ symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
-+ length = entry & DECODE_TABLE_LENGTH_MASK;
-+
-+ /* If the root table is indexed by the full 'max_codeword_len' bits,
-+ * then there cannot be any subtables, and this will be known at compile
-+ * time. Otherwise, we must check whether the decoded symbol is really
-+ * a subtable pointer. If so, we must discard the bits with which the
-+ * root table was indexed, then index the subtable by the next 'length'
-+ * bits of input to get the real entry.
-+ */
-+ if (max_codeword_len > table_bits &&
-+ entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT))) {
-+ /* Subtable required */
-+ bitstream_remove_bits(is, table_bits);
-+ entry = decode_table[symbol + bitstream_peek_bits(is, length)];
-+ symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
-+ length = entry & DECODE_TABLE_LENGTH_MASK;
-+ }
-+
-+ /* Discard the bits (or the remaining bits, if a subtable was required)
-+ * of the codeword.
-+ */
-+ bitstream_remove_bits(is, length);
-+
-+ /* Return the decoded symbol. */
-+ return symbol;
-+}
-+
-+/*
-+ * The DECODE_TABLE_ENOUGH() macro evaluates to the maximum number of decode
-+ * table entries, including all subtable entries, that may be required for
-+ * decoding a given Huffman code. This depends on three parameters:
-+ *
-+ * num_syms: the maximum number of symbols in the code
-+ * table_bits: the number of bits with which the root table will be indexed
-+ * max_codeword_len: the maximum allowed codeword length in the code
-+ *
-+ * Given these parameters, the utility program 'enough' from zlib, when passed
-+ * the three arguments 'num_syms', 'table_bits', and 'max_codeword_len', will
-+ * compute the maximum number of entries required. This has already been done
-+ * for the combinations we need and incorporated into the macro below so that
-+ * the mapping can be done at compilation time. If an unknown combination is
-+ * used, then a compilation error will result. To fix this, use 'enough' to
-+ * find the missing value and add it below. If that still doesn't fix the
-+ * compilation error, then most likely a constraint would be violated by the
-+ * requested parameters, so they cannot be used, at least without other changes
-+ * to the decode table --- see DECODE_TABLE_SIZE().
-+ */
-+#define DECODE_TABLE_ENOUGH(num_syms, table_bits, max_codeword_len) ( \
-+ ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 15) ? 128 : \
-+ ((num_syms) == 8 && (table_bits) == 5 && (max_codeword_len) == 7) ? 36 : \
-+ ((num_syms) == 8 && (table_bits) == 6 && (max_codeword_len) == 7) ? 66 : \
-+ ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 7) ? 128 : \
-+ ((num_syms) == 20 && (table_bits) == 5 && (max_codeword_len) == 15) ? 1062 : \
-+ ((num_syms) == 20 && (table_bits) == 6 && (max_codeword_len) == 15) ? 582 : \
-+ ((num_syms) == 20 && (table_bits) == 7 && (max_codeword_len) == 15) ? 390 : \
-+ ((num_syms) == 54 && (table_bits) == 9 && (max_codeword_len) == 15) ? 618 : \
-+ ((num_syms) == 54 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1098 : \
-+ ((num_syms) == 249 && (table_bits) == 9 && (max_codeword_len) == 16) ? 878 : \
-+ ((num_syms) == 249 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1326 : \
-+ ((num_syms) == 249 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2318 : \
-+ ((num_syms) == 256 && (table_bits) == 9 && (max_codeword_len) == 15) ? 822 : \
-+ ((num_syms) == 256 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1302 : \
-+ ((num_syms) == 256 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2310 : \
-+ ((num_syms) == 512 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1558 : \
-+ ((num_syms) == 512 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2566 : \
-+ ((num_syms) == 512 && (table_bits) == 12 && (max_codeword_len) == 15) ? 4606 : \
-+ ((num_syms) == 656 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1734 : \
-+ ((num_syms) == 656 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2726 : \
-+ ((num_syms) == 656 && (table_bits) == 12 && (max_codeword_len) == 16) ? 4758 : \
-+ ((num_syms) == 799 && (table_bits) == 9 && (max_codeword_len) == 15) ? 1366 : \
-+ ((num_syms) == 799 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1846 : \
-+ ((num_syms) == 799 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2854 : \
-+ -1)
-+
-+/* Wrapper around DECODE_TABLE_ENOUGH() that does additional compile-time
-+ * validation.
-+ */
-+#define DECODE_TABLE_SIZE(num_syms, table_bits, max_codeword_len) ( \
-+ \
-+ /* All values must be positive. */ \
-+ STATIC_ASSERT_ZERO((num_syms) > 0) + \
-+ STATIC_ASSERT_ZERO((table_bits) > 0) + \
-+ STATIC_ASSERT_ZERO((max_codeword_len) > 0) + \
-+ \
-+ /* There cannot be more symbols than possible codewords. */ \
-+ STATIC_ASSERT_ZERO((num_syms) <= 1U << (max_codeword_len)) + \
-+ \
-+ /* There is no reason for the root table to be indexed with */ \
-+ /* more bits than the maximum codeword length. */ \
-+ STATIC_ASSERT_ZERO((table_bits) <= (max_codeword_len)) + \
-+ \
-+ /* The maximum symbol value must fit in the 'symbol' field. */ \
-+ STATIC_ASSERT_ZERO((num_syms) - 1 <= DECODE_TABLE_MAX_SYMBOL) + \
-+ \
-+ /* The maximum codeword length in the root table must fit in */ \
-+ /* the 'length' field. */ \
-+ STATIC_ASSERT_ZERO((table_bits) <= DECODE_TABLE_MAX_LENGTH) + \
-+ \
-+ /* The maximum codeword length in a subtable must fit in the */ \
-+ /* 'length' field. */ \
-+ STATIC_ASSERT_ZERO((max_codeword_len) - (table_bits) <= \
-+ DECODE_TABLE_MAX_LENGTH) + \
-+ \
-+ /* The minimum subtable index must be greater than the maximum */\
-+ /* symbol value. If this were not the case, then there would */\
-+ /* be no way to tell whether a given root table entry is a */ \
-+ /* "subtable pointer" or not. (An alternate solution would */ \
-+ /* be to reserve a flag bit specifically for this purpose.) */ \
-+ STATIC_ASSERT_ZERO((1U << (table_bits)) > (num_syms) - 1) + \
-+ \
-+ /* The needed 'enough' value must have been defined. */ \
-+ STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \
-+ (num_syms), (table_bits), \
-+ (max_codeword_len)) > 0) + \
-+ \
-+ /* The maximum subtable index must fit in the 'symbol' field. */\
-+ STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \
-+ (num_syms), (table_bits), \
-+ (max_codeword_len)) - 1 <= \
-+ DECODE_TABLE_MAX_SYMBOL) + \
-+ \
-+ /* Finally, make the macro evaluate to the needed maximum */ \
-+ /* number of decode table entries. */ \
-+ DECODE_TABLE_ENOUGH((num_syms), (table_bits), \
-+ (max_codeword_len)) \
-+)
-+
-+
-+/*
-+ * Declare the decode table for a Huffman code, given several compile-time
-+ * constants that describe the code. See DECODE_TABLE_ENOUGH() for details.
-+ *
-+ * Decode tables must be aligned to a DECODE_TABLE_ALIGNMENT-byte boundary.
-+ * This implies that if a decode table is nested inside a dynamically allocated
-+ * structure, then the outer structure must be allocated on a
-+ * DECODE_TABLE_ALIGNMENT-byte aligned boundary as well.
-+ */
-+#define DECODE_TABLE(name, num_syms, table_bits, max_codeword_len) \
-+ u16 name[DECODE_TABLE_SIZE((num_syms), (table_bits), \
-+ (max_codeword_len))] \
-+ _aligned_attribute(DECODE_TABLE_ALIGNMENT)
-+
-+/*
-+ * Declare the temporary "working_space" array needed for building the decode
-+ * table for a Huffman code.
-+ */
-+#define DECODE_TABLE_WORKING_SPACE(name, num_syms, max_codeword_len) \
-+ u16 name[2 * ((max_codeword_len) + 1) + (num_syms)]
-+
-+extern int
-+make_huffman_decode_table(u16 decode_table[], u32 num_syms,
-+ u32 table_bits, const u8 lens[],
-+ u32 max_codeword_len, u16 working_space[]);
-+
-+/******************************************************************************/
-+/* LZ match copying */
-+/*----------------------------------------------------------------------------*/
-+
-+static forceinline void
-+copy_word_unaligned(const void *src, void *dst)
-+{
-+ store_word_unaligned(load_word_unaligned(src), dst);
-+}
-+
-+static forceinline size_t
-+repeat_u16(u16 b)
-+{
-+ size_t v = b;
-+
-+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
-+ v |= v << 16;
-+ v |= v << ((WORDBITS == 64) ? 32 : 0);
-+ return v;
-+}
-+
-+static forceinline size_t
-+repeat_byte(u8 b)
-+{
-+ return repeat_u16(((u16)b << 8) | b);
-+}
-+
-+/*
-+ * Copy an LZ77 match of 'length' bytes from the match source at 'out_next -
-+ * offset' to the match destination at 'out_next'. The source and destination
-+ * may overlap.
-+ *
-+ * This handles validating the length and offset. It is validated that the
-+ * beginning of the match source is '>= out_begin' and that end of the match
-+ * destination is '<= out_end'. The return value is 0 if the match was valid
-+ * (and was copied), otherwise -1.
-+ *
-+ * 'min_length' is a hint which specifies the minimum possible match length.
-+ * This should be a compile-time constant.
-+ */
-+static forceinline int
-+lz_copy(u32 length, u32 offset, u8 *out_begin, u8 *out_next, u8 *out_end,
-+ u32 min_length)
-+{
-+ const u8 *src;
-+ u8 *end;
-+
-+ /* Validate the offset. */
-+ if (unlikely(offset > out_next - out_begin))
-+ return -1;
-+
-+ /*
-+ * Fast path: copy a match which is no longer than a few words, is not
-+ * overlapped such that copying a word at a time would produce incorrect
-+ * results, and is not too close to the end of the buffer. Note that
-+ * this might copy more than the length of the match, but that's okay in
-+ * this scenario.
-+ */
-+ src = out_next - offset;
-+ if (UNALIGNED_ACCESS_IS_FAST && length <= 3 * WORDBYTES &&
-+ offset >= WORDBYTES && out_end - out_next >= 3 * WORDBYTES) {
-+ copy_word_unaligned(src + WORDBYTES*0, out_next + WORDBYTES*0);
-+ copy_word_unaligned(src + WORDBYTES*1, out_next + WORDBYTES*1);
-+ copy_word_unaligned(src + WORDBYTES*2, out_next + WORDBYTES*2);
-+ return 0;
-+ }
-+
-+ /* Validate the length. This isn't needed in the fast path above, due
-+ * to the additional conditions tested, but we do need it here.
-+ */
-+ if (unlikely(length > out_end - out_next))
-+ return -1;
-+ end = out_next + length;
-+
-+ /*
-+ * Try to copy one word at a time. On i386 and x86_64 this is faster
-+ * than copying one byte at a time, unless the data is near-random and
-+ * all the matches have very short lengths. Note that since this
-+ * requires unaligned memory accesses, it won't necessarily be faster on
-+ * every architecture.
-+ *
-+ * Also note that we might copy more than the length of the match. For
-+ * example, if a word is 8 bytes and the match is of length 5, then
-+ * we'll simply copy 8 bytes. This is okay as long as we don't write
-+ * beyond the end of the output buffer, hence the check for (out_end -
-+ * end >= WORDBYTES - 1).
-+ */
-+ if (UNALIGNED_ACCESS_IS_FAST && likely(out_end - end >= WORDBYTES - 1)) {
-+ if (offset >= WORDBYTES) {
-+ /* The source and destination words don't overlap. */
-+ do {
-+ copy_word_unaligned(src, out_next);
-+ src += WORDBYTES;
-+ out_next += WORDBYTES;
-+ } while (out_next < end);
-+ return 0;
-+ } else if (offset == 1) {
-+ /* Offset 1 matches are equivalent to run-length
-+ * encoding of the previous byte. This case is common
-+ * if the data contains many repeated bytes.
-+ */
-+ size_t v = repeat_byte(*(out_next - 1));
-+
-+ do {
-+ store_word_unaligned(v, out_next);
-+ src += WORDBYTES;
-+ out_next += WORDBYTES;
-+ } while (out_next < end);
-+ return 0;
-+ }
-+ /*
-+ * We don't bother with special cases for other 'offset <
-+ * WORDBYTES', which are usually rarer than 'offset == 1'.
-+ * Extra checks will just slow things down. Actually, it's
-+ * possible to handle all the 'offset < WORDBYTES' cases using
-+ * the same code, but it still becomes more complicated doesn't
-+ * seem any faster overall; it definitely slows down the more
-+ * common 'offset == 1' case.
-+ */
-+ }
-+
-+ /* Fall back to a bytewise copy. */
-+ if (min_length >= 2)
-+ *out_next++ = *src++;
-+ if (min_length >= 3)
-+ *out_next++ = *src++;
-+ if (min_length >= 4)
-+ *out_next++ = *src++;
-+ do {
-+ *out_next++ = *src++;
-+ } while (out_next != end);
-+ return 0;
-+}
-+
-+#endif /* _DECOMPRESS_COMMON_H */
-diff --git a/fs/ntfs3/lib/lzx_common.c b/fs/ntfs3/lib/lzx_common.c
-new file mode 100644
-index 000000000000..d89d0fac333c
---- /dev/null
-+++ b/fs/ntfs3/lib/lzx_common.c
-@@ -0,0 +1,204 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * lzx_common.c - Common code for LZX compression and decompression.
-+ */
-+
-+/*
-+ * Copyright (C) 2012-2016 Eric Biggers
-+ *
-+ * This program is free software: you can redistribute it and/or modify it under
-+ * the terms of the GNU General Public License as published by the Free Software
-+ * Foundation, either version 2 of the License, or (at your option) any later
-+ * version.
-+ *
-+ * This program is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-+ * details.
-+ *
-+ * You should have received a copy of the GNU General Public License along with
-+ * this program. If not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+#include "lzx_common.h"
-+
-+/* Mapping: offset slot => first match offset that uses that offset slot.
-+ * The offset slots for repeat offsets map to "fake" offsets < 1.
-+ */
-+const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = {
-+ -2, -1, 0, 1, 2, /* 0 --- 4 */
-+ 4, 6, 10, 14, 22, /* 5 --- 9 */
-+ 30, 46, 62, 94, 126, /* 10 --- 14 */
-+ 190, 254, 382, 510, 766, /* 15 --- 19 */
-+ 1022, 1534, 2046, 3070, 4094, /* 20 --- 24 */
-+ 6142, 8190, 12286, 16382, 24574, /* 25 --- 29 */
-+ 32766, 49150, 65534, 98302, 131070, /* 30 --- 34 */
-+ 196606, 262142, 393214, 524286, 655358, /* 35 --- 39 */
-+ 786430, 917502, 1048574, 1179646, 1310718, /* 40 --- 44 */
-+ 1441790, 1572862, 1703934, 1835006, 1966078, /* 45 --- 49 */
-+ 2097150 /* extra */
-+};
-+
-+/* Mapping: offset slot => how many extra bits must be read and added to the
-+ * corresponding offset slot base to decode the match offset.
-+ */
-+const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = {
-+ 0, 0, 0, 0, 1,
-+ 1, 2, 2, 3, 3,
-+ 4, 4, 5, 5, 6,
-+ 6, 7, 7, 8, 8,
-+ 9, 9, 10, 10, 11,
-+ 11, 12, 12, 13, 13,
-+ 14, 14, 15, 15, 16,
-+ 16, 17, 17, 17, 17,
-+ 17, 17, 17, 17, 17,
-+ 17, 17, 17, 17, 17,
-+};
-+
-+
-+/* Round the specified buffer size up to the next valid LZX window size, and
-+ * return its order (log2). Or, if the buffer size is 0 or greater than the
-+ * largest valid LZX window size, return 0.
-+ */
-+u32
-+lzx_get_window_order(size_t max_bufsize)
-+{
-+ if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE)
-+ return 0;
-+
-+ return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER);
-+}
-+
-+/* Given a valid LZX window order, return the number of symbols that will exist
-+ * in the main Huffman code.
-+ */
-+u32
-+lzx_get_num_main_syms(u32 window_order)
-+{
-+ /* Note: one would expect that the maximum match offset would be
-+ * 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two
-+ * bytes were to match the last two bytes. However, the format
-+ * disallows this case. This reduces the number of needed offset slots
-+ * by 1.
-+ */
-+ u32 window_size = (u32)1 << window_order;
-+ u32 max_offset = window_size - LZX_MIN_MATCH_LEN - 1;
-+ u32 num_offset_slots = 30;
-+
-+ while (max_offset >= lzx_offset_slot_base[num_offset_slots])
-+ num_offset_slots++;
-+
-+ return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS);
-+}
-+
-+static void
-+do_translate_target(void *target, s32 input_pos)
-+{
-+ s32 abs_offset, rel_offset;
-+
-+ rel_offset = get_unaligned_le32(target);
-+ if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) {
-+ if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) {
-+ /* "good translation" */
-+ abs_offset = rel_offset + input_pos;
-+ } else {
-+ /* "compensating translation" */
-+ abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE;
-+ }
-+ put_unaligned_le32(abs_offset, target);
-+ }
-+}
-+
-+static void
-+undo_translate_target(void *target, s32 input_pos)
-+{
-+ s32 abs_offset, rel_offset;
-+
-+ abs_offset = get_unaligned_le32(target);
-+ if (abs_offset >= 0) {
-+ if (abs_offset < LZX_WIM_MAGIC_FILESIZE) {
-+ /* "good translation" */
-+ rel_offset = abs_offset - input_pos;
-+ put_unaligned_le32(rel_offset, target);
-+ }
-+ } else {
-+ if (abs_offset >= -input_pos) {
-+ /* "compensating translation" */
-+ rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE;
-+ put_unaligned_le32(rel_offset, target);
-+ }
-+ }
-+}
-+
-+/*
-+ * Do or undo the 'E8' preprocessing used in LZX. Before compression, the
-+ * uncompressed data is preprocessed by changing the targets of x86 CALL
-+ * instructions from relative offsets to absolute offsets. After decompression,
-+ * the translation is undone by changing the targets of x86 CALL instructions
-+ * from absolute offsets to relative offsets.
-+ *
-+ * Note that despite its intent, E8 preprocessing can be done on any data even
-+ * if it is not actually x86 machine code. In fact, E8 preprocessing appears to
-+ * always be used in LZX-compressed resources in WIM files; there is no bit to
-+ * indicate whether it is used or not, unlike in the LZX compressed format as
-+ * used in cabinet files, where a bit is reserved for that purpose.
-+ *
-+ * E8 preprocessing is disabled in the last 6 bytes of the uncompressed data,
-+ * which really means the 5-byte call instruction cannot start in the last 10
-+ * bytes of the uncompressed data. This is one of the errors in the LZX
-+ * documentation.
-+ *
-+ * E8 preprocessing does not appear to be disabled after the 32768th chunk of a
-+ * WIM resource, which apparently is another difference from the LZX compression
-+ * used in cabinet files.
-+ *
-+ * E8 processing is supposed to take the file size as a parameter, as it is used
-+ * in calculating the translated jump targets. But in WIM files, this file size
-+ * is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).
-+ */
-+static void
-+lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32))
-+{
-+ /*
-+ * A worthwhile optimization is to push the end-of-buffer check into the
-+ * relatively rare E8 case. This is possible if we replace the last six
-+ * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
-+ * before reaching end-of-buffer. In addition, this scheme guarantees
-+ * that no translation can begin following an E8 byte in the last 10
-+ * bytes because a 4-byte offset containing E8 as its high byte is a
-+ * large negative number that is not valid for translation. That is
-+ * exactly what we need.
-+ */
-+ u8 *tail;
-+ u8 saved_bytes[6];
-+ u8 *p;
-+
-+ if (size <= 10)
-+ return;
-+
-+ tail = &data[size - 6];
-+ memcpy(saved_bytes, tail, 6);
-+ memset(tail, 0xE8, 6);
-+ p = data;
-+ for (;;) {
-+ while (*p != 0xE8)
-+ p++;
-+ if (p >= tail)
-+ break;
-+ (*process_target)(p + 1, p - data);
-+ p += 5;
-+ }
-+ memcpy(tail, saved_bytes, 6);
-+}
-+
-+void
-+lzx_preprocess(u8 *data, u32 size)
-+{
-+ lzx_e8_filter(data, size, do_translate_target);
-+}
-+
-+void
-+lzx_postprocess(u8 *data, u32 size)
-+{
-+ lzx_e8_filter(data, size, undo_translate_target);
-+}
-diff --git a/fs/ntfs3/lib/lzx_common.h b/fs/ntfs3/lib/lzx_common.h
-new file mode 100644
-index 000000000000..2c87a0c9b5b3
---- /dev/null
-+++ b/fs/ntfs3/lib/lzx_common.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+/*
-+ * lzx_common.h
-+ *
-+ * Declarations shared between LZX compression and decompression.
-+ */
-+
-+#ifndef _LZX_COMMON_H
-+#define _LZX_COMMON_H
-+
-+#include "lzx_constants.h"
-+#include "common_defs.h"
-+
-+extern const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1];
-+
-+extern const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
-+
-+extern u32
-+lzx_get_window_order(size_t max_bufsize);
-+
-+extern u32
-+lzx_get_num_main_syms(u32 window_order);
-+
-+extern void
-+lzx_preprocess(u8 *data, u32 size);
-+
-+extern void
-+lzx_postprocess(u8 *data, u32 size);
-+
-+#endif /* _LZX_COMMON_H */
-diff --git a/fs/ntfs3/lib/lzx_constants.h b/fs/ntfs3/lib/lzx_constants.h
-new file mode 100644
-index 000000000000..1115ce8ce5b1
---- /dev/null
-+++ b/fs/ntfs3/lib/lzx_constants.h
-@@ -0,0 +1,113 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+/*
-+ * lzx_constants.h
-+ *
-+ * Constants for the LZX compression format.
-+ */
-+
-+#ifndef _LZX_CONSTANTS_H
-+#define _LZX_CONSTANTS_H
-+
-+/* Number of literal byte values. */
-+#define LZX_NUM_CHARS 256
-+
-+/* The smallest and largest allowed match lengths. */
-+#define LZX_MIN_MATCH_LEN 2
-+#define LZX_MAX_MATCH_LEN 257
-+
-+/* Number of distinct match lengths that can be represented. */
-+#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
-+
-+/* Number of match lengths for which no length symbol is required. */
-+#define LZX_NUM_PRIMARY_LENS 7
-+#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
-+
-+/* Valid values of the 3-bit block type field. */
-+#define LZX_BLOCKTYPE_VERBATIM 1
-+#define LZX_BLOCKTYPE_ALIGNED 2
-+#define LZX_BLOCKTYPE_UNCOMPRESSED 3
-+
-+/* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum
-+ * sizes of the sliding window.
-+ */
-+#define LZX_MIN_WINDOW_ORDER 15u
-+#define LZX_MAX_WINDOW_ORDER 21
-+#define LZX_MIN_WINDOW_SIZE (1UL << LZX_MIN_WINDOW_ORDER) /* 32768 */
-+#define LZX_MAX_WINDOW_SIZE (1UL << LZX_MAX_WINDOW_ORDER) /* 2097152 */
-+
-+/* Maximum number of offset slots. (The actual number of offset slots depends
-+ * on the window size.)
-+ */
-+#define LZX_MAX_OFFSET_SLOTS 50
-+
-+/* Maximum number of symbols in the main code. (The actual number of symbols in
-+ * the main code depends on the window size.)
-+ */
-+#define LZX_MAINCODE_MAX_NUM_SYMBOLS \
-+ (LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
-+
-+/* Number of symbols in the length code. */
-+#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
-+
-+/* Number of symbols in the pre-code. */
-+#define LZX_PRECODE_NUM_SYMBOLS 20
-+
-+/* Number of bits in which each pre-code codeword length is represented. */
-+#define LZX_PRECODE_ELEMENT_SIZE 4
-+
-+/* Number of low-order bits of each match offset that are entropy-encoded in
-+ * aligned offset blocks.
-+ */
-+#define LZX_NUM_ALIGNED_OFFSET_BITS 3
-+
-+/* Number of symbols in the aligned offset code. */
-+#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
-+
-+/* Mask for the match offset bits that are entropy-encoded in aligned offset
-+ * blocks.
-+ */
-+#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
-+
-+/* Number of bits in which each aligned offset codeword length is represented. */
-+#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
-+
-+/* The first offset slot which requires an aligned offset symbol in aligned
-+ * offset blocks.
-+ */
-+#define LZX_MIN_ALIGNED_OFFSET_SLOT 8
-+
-+/* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT. */
-+#define LZX_MIN_ALIGNED_OFFSET 14
-+
-+/* The maximum number of extra offset bits in verbatim blocks. (One would need
-+ * to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset
-+ * bits in *aligned* blocks.)
-+ */
-+#define LZX_MAX_NUM_EXTRA_BITS 17
-+
-+/* Maximum lengths (in bits) for length-limited Huffman code construction. */
-+#define LZX_MAX_MAIN_CODEWORD_LEN 16
-+#define LZX_MAX_LEN_CODEWORD_LEN 16
-+#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
-+#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
-+
-+/* For LZX-compressed blocks in WIM resources, this value is always used as the
-+ * filesize parameter for the call instruction (0xe8 byte) preprocessing, even
-+ * though the blocks themselves are not this size, and the size of the actual
-+ * file resource in the WIM file is very likely to be something entirely
-+ * different as well.
-+ */
-+#define LZX_WIM_MAGIC_FILESIZE 12000000
-+
-+/* Assumed LZX block size when the encoded block size begins with a 0 bit.
-+ * This is probably WIM-specific.
-+ */
-+#define LZX_DEFAULT_BLOCK_SIZE 32768
-+
-+/* Number of offsets in the recent (or "repeat") offsets queue. */
-+#define LZX_NUM_RECENT_OFFSETS 3
-+
-+/* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT). */
-+#define LZX_OFFSET_ADJUSTMENT (LZX_NUM_RECENT_OFFSETS - 1)
-+
-+#endif /* _LZX_CONSTANTS_H */
-diff --git a/fs/ntfs3/lib/lzx_decompress.c b/fs/ntfs3/lib/lzx_decompress.c
-new file mode 100644
-index 000000000000..d6897a394abe
---- /dev/null
-+++ b/fs/ntfs3/lib/lzx_decompress.c
-@@ -0,0 +1,553 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * lzx_decompress.c
-+ *
-+ * A decompressor for the LZX compression format, as used in WIM files.
-+ */
-+
-+/*
-+ * Copyright (C) 2012-2016 Eric Biggers
-+ *
-+ * This program is free software: you can redistribute it and/or modify it under
-+ * the terms of the GNU General Public License as published by the Free Software
-+ * Foundation, either version 2 of the License, or (at your option) any later
-+ * version.
-+ *
-+ * This program is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-+ * details.
-+ *
-+ * You should have received a copy of the GNU General Public License along with
-+ * this program. If not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+/*
-+ * LZX is an LZ77 and Huffman-code based compression format that has many
-+ * similarities to DEFLATE (the format used by zlib/gzip). The compression
-+ * ratio is as good or better than DEFLATE. See lzx_compress.c for a format
-+ * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a
-+ * historical overview. Here I make some pragmatic notes.
-+ *
-+ * The old specification for LZX is the document "Microsoft LZX Data Compression
-+ * Format" (1997). It defines the LZX format as used in cabinet files. Allowed
-+ * window sizes are 2^n where 15 <= n <= 21. However, this document contains
-+ * several errors, so don't read too much into it...
-+ *
-+ * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA
-+ * Compression and Decompression" (2014). It defines the LZX format as used by
-+ * Microsoft's binary patcher. It corrects several errors in the 1997 document
-+ * and extends the format in several ways --- namely, optional reference data,
-+ * up to 2^25 byte windows, and longer match lengths.
-+ *
-+ * WIM files use a more restricted form of LZX. No LZX DELTA extensions are
-+ * present, the window is not "sliding", E8 preprocessing is done
-+ * unconditionally with a fixed file size, and the maximum window size is always
-+ * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource).
-+ * This code is primarily intended to implement this form of LZX. But although
-+ * not compatible with WIMGAPI, this code also supports maximum window sizes up
-+ * to 2^21 bytes.
-+ *
-+ * TODO: Add support for window sizes up to 2^25 bytes.
-+ */
-+
-+#include "decompress_common.h"
-+#include "lzx_common.h"
-+
-+/* These values are chosen for fast decompression. */
-+#define LZX_MAINCODE_TABLEBITS 11
-+#define LZX_LENCODE_TABLEBITS 9
-+#define LZX_PRECODE_TABLEBITS 6
-+#define LZX_ALIGNEDCODE_TABLEBITS 7
-+
-+#define LZX_READ_LENS_MAX_OVERRUN 50
-+
-+struct lzx_decompressor {
-+
-+ DECODE_TABLE(maincode_decode_table, LZX_MAINCODE_MAX_NUM_SYMBOLS,
-+ LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
-+ u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
-+
-+ DECODE_TABLE(lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS,
-+ LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
-+ u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
-+
-+ union {
-+ DECODE_TABLE(alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS,
-+ LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
-+ u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
-+ };
-+
-+ union {
-+ DECODE_TABLE(precode_decode_table, LZX_PRECODE_NUM_SYMBOLS,
-+ LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
-+ u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
-+ u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
-+ };
-+
-+ union {
-+ DECODE_TABLE_WORKING_SPACE(maincode_working_space,
-+ LZX_MAINCODE_MAX_NUM_SYMBOLS,
-+ LZX_MAX_MAIN_CODEWORD_LEN);
-+ DECODE_TABLE_WORKING_SPACE(lencode_working_space,
-+ LZX_LENCODE_NUM_SYMBOLS,
-+ LZX_MAX_LEN_CODEWORD_LEN);
-+ DECODE_TABLE_WORKING_SPACE(alignedcode_working_space,
-+ LZX_ALIGNEDCODE_NUM_SYMBOLS,
-+ LZX_MAX_ALIGNED_CODEWORD_LEN);
-+ DECODE_TABLE_WORKING_SPACE(precode_working_space,
-+ LZX_PRECODE_NUM_SYMBOLS,
-+ LZX_MAX_PRE_CODEWORD_LEN);
-+ };
-+
-+ u32 window_order;
-+ u32 num_main_syms;
-+
-+ /* Like lzx_extra_offset_bits[], but does not include the entropy-coded
-+ * bits of aligned offset blocks
-+ */
-+ u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS];
-+
-+} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
-+
-+/* Read a Huffman-encoded symbol using the precode. */
-+static forceinline u32
-+read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
-+{
-+ return read_huffsym(is, d->precode_decode_table,
-+ LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
-+}
-+
-+/* Read a Huffman-encoded symbol using the main code. */
-+static forceinline u32
-+read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
-+{
-+ return read_huffsym(is, d->maincode_decode_table,
-+ LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
-+}
-+
-+/* Read a Huffman-encoded symbol using the length code. */
-+static forceinline u32
-+read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
-+{
-+ return read_huffsym(is, d->lencode_decode_table,
-+ LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
-+}
-+
-+/* Read a Huffman-encoded symbol using the aligned offset code. */
-+static forceinline u32
-+read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
-+{
-+ return read_huffsym(is, d->alignedcode_decode_table,
-+ LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
-+}
-+
-+/*
-+ * Read a precode from the compressed input bitstream, then use it to decode
-+ * @num_lens codeword length values and write them to @lens.
-+ */
-+static int
-+lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is,
-+ u8 *lens, u32 num_lens)
-+{
-+ u8 *len_ptr = lens;
-+ u8 *lens_end = lens + num_lens;
-+ int i;
-+
-+ /* Read the lengths of the precode codewords. These are stored
-+ * explicitly.
-+ */
-+ for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
-+ d->precode_lens[i] =
-+ bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
-+ }
-+
-+ /* Build the decoding table for the precode. */
-+ if (make_huffman_decode_table(d->precode_decode_table,
-+ LZX_PRECODE_NUM_SYMBOLS,
-+ LZX_PRECODE_TABLEBITS,
-+ d->precode_lens,
-+ LZX_MAX_PRE_CODEWORD_LEN,
-+ d->precode_working_space))
-+ return -1;
-+
-+ /* Decode the codeword lengths. */
-+ do {
-+ u32 presym;
-+ u8 len;
-+
-+ /* Read the next precode symbol. */
-+ presym = read_presym(d, is);
-+ if (presym < 17) {
-+ /* Difference from old length */
-+ len = *len_ptr - presym;
-+ if ((s8)len < 0)
-+ len += 17;
-+ *len_ptr++ = len;
-+ } else {
-+ /* Special RLE values */
-+
-+ u32 run_len;
-+
-+ if (presym == 17) {
-+ /* Run of 0's */
-+ run_len = 4 + bitstream_read_bits(is, 4);
-+ len = 0;
-+ } else if (presym == 18) {
-+ /* Longer run of 0's */
-+ run_len = 20 + bitstream_read_bits(is, 5);
-+ len = 0;
-+ } else {
-+ /* Run of identical lengths */
-+ run_len = 4 + bitstream_read_bits(is, 1);
-+ presym = read_presym(d, is);
-+ if (unlikely(presym > 17))
-+ return -1;
-+ len = *len_ptr - presym;
-+ if ((s8)len < 0)
-+ len += 17;
-+ }
-+
-+ do {
-+ *len_ptr++ = len;
-+ } while (--run_len);
-+ /*
-+ * The worst case overrun is when presym == 18,
-+ * run_len == 20 + 31, and only 1 length was remaining.
-+ * So LZX_READ_LENS_MAX_OVERRUN == 50.
-+ *
-+ * Overrun while reading the first half of maincode_lens
-+ * can corrupt the previous values in the second half.
-+ * This doesn't really matter because the resulting
-+ * lengths will still be in range, and data that
-+ * generates overruns is invalid anyway.
-+ */
-+ }
-+ } while (len_ptr < lens_end);
-+
-+ return 0;
-+}
-+
-+/*
-+ * Read the header of an LZX block. For all block types, the block type and
-+ * size is saved in *block_type_ret and *block_size_ret, respectively. For
-+ * compressed blocks, the codeword lengths are also saved. For uncompressed
-+ * blocks, the recent offsets queue is also updated.
-+ */
-+static int
-+lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is,
-+ u32 recent_offsets[], int *block_type_ret,
-+ u32 *block_size_ret)
-+{
-+ int block_type;
-+ u32 block_size;
-+ int i;
-+
-+ bitstream_ensure_bits(is, 4);
-+
-+ /* Read the block type. */
-+ block_type = bitstream_pop_bits(is, 3);
-+
-+ /* Read the block size. */
-+ if (bitstream_pop_bits(is, 1)) {
-+ block_size = LZX_DEFAULT_BLOCK_SIZE;
-+ } else {
-+ block_size = bitstream_read_bits(is, 16);
-+ if (d->window_order >= 16) {
-+ block_size <<= 8;
-+ block_size |= bitstream_read_bits(is, 8);
-+ }
-+ }
-+
-+ switch (block_type) {
-+
-+ case LZX_BLOCKTYPE_ALIGNED:
-+
-+ /* Read the aligned offset codeword lengths. */
-+
-+ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
-+ d->alignedcode_lens[i] =
-+ bitstream_read_bits(is,
-+ LZX_ALIGNEDCODE_ELEMENT_SIZE);
-+ }
-+
-+ /* Fall though, since the rest of the header for aligned offset
-+ * blocks is the same as that for verbatim blocks.
-+ */
-+ fallthrough;
-+
-+ case LZX_BLOCKTYPE_VERBATIM:
-+
-+ /* Read the main codeword lengths, which are divided into two
-+ * parts: literal symbols and match headers.
-+ */
-+ if (lzx_read_codeword_lens(d, is, d->maincode_lens,
-+ LZX_NUM_CHARS))
-+ return -1;
-+
-+ if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS,
-+ d->num_main_syms - LZX_NUM_CHARS))
-+ return -1;
-+
-+
-+ /* Read the length codeword lengths. */
-+
-+ if (lzx_read_codeword_lens(d, is, d->lencode_lens,
-+ LZX_LENCODE_NUM_SYMBOLS))
-+ return -1;
-+
-+ break;
-+
-+ case LZX_BLOCKTYPE_UNCOMPRESSED:
-+ /*
-+ * The header of an uncompressed block contains new values for
-+ * the recent offsets queue, starting on the next 16-bit
-+ * boundary in the bitstream. Careful: if the stream is
-+ * *already* aligned, the correct thing to do is to throw away
-+ * the next 16 bits (this is probably a mistake in the format).
-+ */
-+ bitstream_ensure_bits(is, 1);
-+ bitstream_align(is);
-+ recent_offsets[0] = bitstream_read_u32(is);
-+ recent_offsets[1] = bitstream_read_u32(is);
-+ recent_offsets[2] = bitstream_read_u32(is);
-+
-+ /* Offsets of 0 are invalid. */
-+ if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
-+ recent_offsets[2] == 0)
-+ return -1;
-+ break;
-+
-+ default:
-+ /* Unrecognized block type. */
-+ return -1;
-+ }
-+
-+ *block_type_ret = block_type;
-+ *block_size_ret = block_size;
-+ return 0;
-+}
-+
-+/* Decompress a block of LZX-compressed data. */
-+static int
-+lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
-+ int block_type, u32 block_size,
-+ u8 * const out_begin, u8 *out_next, u32 recent_offsets[])
-+{
-+ u8 * const block_end = out_next + block_size;
-+ u32 min_aligned_offset_slot;
-+
-+ /*
-+ * Build the Huffman decode tables. We always need to build the main
-+ * and length decode tables. For aligned blocks we additionally need to
-+ * build the aligned offset decode table.
-+ */
-+
-+ if (make_huffman_decode_table(d->maincode_decode_table,
-+ d->num_main_syms,
-+ LZX_MAINCODE_TABLEBITS,
-+ d->maincode_lens,
-+ LZX_MAX_MAIN_CODEWORD_LEN,
-+ d->maincode_working_space))
-+ return -1;
-+
-+ if (make_huffman_decode_table(d->lencode_decode_table,
-+ LZX_LENCODE_NUM_SYMBOLS,
-+ LZX_LENCODE_TABLEBITS,
-+ d->lencode_lens,
-+ LZX_MAX_LEN_CODEWORD_LEN,
-+ d->lencode_working_space))
-+ return -1;
-+
-+ if (block_type == LZX_BLOCKTYPE_ALIGNED) {
-+ if (make_huffman_decode_table(d->alignedcode_decode_table,
-+ LZX_ALIGNEDCODE_NUM_SYMBOLS,
-+ LZX_ALIGNEDCODE_TABLEBITS,
-+ d->alignedcode_lens,
-+ LZX_MAX_ALIGNED_CODEWORD_LEN,
-+ d->alignedcode_working_space))
-+ return -1;
-+ min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
-+ memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned,
-+ sizeof(lzx_extra_offset_bits));
-+ } else {
-+ min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
-+ memcpy(d->extra_offset_bits, lzx_extra_offset_bits,
-+ sizeof(lzx_extra_offset_bits));
-+ }
-+
-+ /* Decode the literals and matches. */
-+
-+ do {
-+ u32 mainsym;
-+ u32 length;
-+ u32 offset;
-+ u32 offset_slot;
-+
-+ mainsym = read_mainsym(d, is);
-+ if (mainsym < LZX_NUM_CHARS) {
-+ /* Literal */
-+ *out_next++ = mainsym;
-+ continue;
-+ }
-+
-+ /* Match */
-+
-+ /* Decode the length header and offset slot. */
-+ STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0);
-+ length = mainsym % LZX_NUM_LEN_HEADERS;
-+ offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS;
-+
-+ /* If needed, read a length symbol to decode the full length. */
-+ if (length == LZX_NUM_PRIMARY_LENS)
-+ length += read_lensym(d, is);
-+ length += LZX_MIN_MATCH_LEN;
-+
-+ if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
-+ /* Repeat offset */
-+
-+ /* Note: This isn't a real LRU queue, since using the R2
-+ * offset doesn't bump the R1 offset down to R2.
-+ */
-+ offset = recent_offsets[offset_slot];
-+ recent_offsets[offset_slot] = recent_offsets[0];
-+ } else {
-+ /* Explicit offset */
-+ offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]);
-+ if (offset_slot >= min_aligned_offset_slot) {
-+ offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) |
-+ read_alignedsym(d, is);
-+ }
-+ offset += lzx_offset_slot_base[offset_slot];
-+
-+ /* Update the match offset LRU queue. */
-+ STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
-+ recent_offsets[2] = recent_offsets[1];
-+ recent_offsets[1] = recent_offsets[0];
-+ }
-+ recent_offsets[0] = offset;
-+
-+ /* Validate the match and copy it to the current position. */
-+ if (unlikely(lz_copy(length, offset, out_begin,
-+ out_next, block_end, LZX_MIN_MATCH_LEN)))
-+ return -1;
-+ out_next += length;
-+ } while (out_next != block_end);
-+
-+ return 0;
-+}
-+
-+int
-+lzx_decompress(struct lzx_decompressor *__restrict d,
-+ const void *__restrict compressed_data, size_t compressed_size,
-+ void *__restrict uncompressed_data, size_t uncompressed_size)
-+{
-+ u8 * const out_begin = uncompressed_data;
-+ u8 *out_next = out_begin;
-+ u8 * const out_end = out_begin + uncompressed_size;
-+ struct input_bitstream is;
-+ u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
-+ u32 may_have_e8_byte = 0;
-+
-+ STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
-+
-+ init_input_bitstream(&is, compressed_data, compressed_size);
-+
-+ /* Codeword lengths begin as all 0's for delta encoding purposes. */
-+ memset(d->maincode_lens, 0, d->num_main_syms);
-+ memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
-+
-+ /* Decompress blocks until we have all the uncompressed data. */
-+
-+ while (out_next != out_end) {
-+ int block_type;
-+ u32 block_size;
-+
-+ if (lzx_read_block_header(d, &is, recent_offsets,
-+ &block_type, &block_size))
-+ return -1;
-+
-+ if (block_size < 1 || block_size > out_end - out_next)
-+ return -1;
-+
-+ if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) {
-+
-+ /* Compressed block */
-+ if (lzx_decompress_block(d, &is, block_type, block_size,
-+ out_begin, out_next,
-+ recent_offsets))
-+ return -1;
-+
-+ /* If the first E8 byte was in this block, then it must
-+ * have been encoded as a literal using mainsym E8.
-+ */
-+ may_have_e8_byte |= d->maincode_lens[0xE8];
-+ } else {
-+
-+ /* Uncompressed block */
-+ if (bitstream_read_bytes(&is, out_next, block_size))
-+ return -1;
-+
-+ /* Re-align the bitstream if needed. */
-+ if (block_size & 1)
-+ bitstream_read_byte(&is);
-+
-+ /* There may have been an E8 byte in the block. */
-+ may_have_e8_byte = 1;
-+ }
-+ out_next += block_size;
-+ }
-+
-+ /* Postprocess the data unless it cannot possibly contain E8 bytes. */
-+ if (may_have_e8_byte)
-+ lzx_postprocess(uncompressed_data, uncompressed_size);
-+
-+ return 0;
-+}
-+
-+struct lzx_decompressor *
-+lzx_allocate_decompressor(size_t max_block_size)
-+{
-+ u32 window_order;
-+ struct lzx_decompressor *d;
-+ u32 offset_slot;
-+
-+ /*
-+ * ntfs uses lzx only as max_block_size == 0x8000
-+ * this value certainly will not fail
-+ * we can remove lzx_get_window_order + ilog2_ceil + bsrw
-+ */
-+ WARN_ON(max_block_size != 0x8000);
-+
-+ window_order = lzx_get_window_order(max_block_size);
-+ if (window_order == 0)
-+ return ERR_PTR(-EINVAL);
-+
-+ d = aligned_malloc(sizeof(*d), DECODE_TABLE_ALIGNMENT);
-+ if (!d)
-+ return NULL;
-+
-+ d->window_order = window_order;
-+ d->num_main_syms = lzx_get_num_main_syms(window_order);
-+
-+ /* Initialize 'd->extra_offset_bits_minus_aligned'. */
-+ STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) ==
-+ sizeof(lzx_extra_offset_bits));
-+ STATIC_ASSERT(sizeof(d->extra_offset_bits) ==
-+ sizeof(lzx_extra_offset_bits));
-+ memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits,
-+ sizeof(lzx_extra_offset_bits));
-+ for (offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
-+ offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++) {
-+ d->extra_offset_bits_minus_aligned[offset_slot] -=
-+ LZX_NUM_ALIGNED_OFFSET_BITS;
-+ }
-+
-+ return d;
-+}
-+
-+void
-+lzx_free_decompressor(struct lzx_decompressor *d)
-+{
-+ aligned_free(d);
-+}
-diff --git a/fs/ntfs3/lib/xpress_constants.h b/fs/ntfs3/lib/xpress_constants.h
-new file mode 100644
-index 000000000000..c96a03bf4554
---- /dev/null
-+++ b/fs/ntfs3/lib/xpress_constants.h
-@@ -0,0 +1,23 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+/*
-+ * xpress_constants.h
-+ *
-+ * Constants for the XPRESS compression format.
-+ */
-+
-+#ifndef _XPRESS_CONSTANTS_H
-+#define _XPRESS_CONSTANTS_H
-+
-+#define XPRESS_NUM_CHARS 256
-+#define XPRESS_NUM_SYMBOLS 512
-+#define XPRESS_MAX_CODEWORD_LEN 15
-+
-+#define XPRESS_END_OF_DATA 256
-+
-+#define XPRESS_MIN_OFFSET 1
-+#define XPRESS_MAX_OFFSET 65535
-+
-+#define XPRESS_MIN_MATCH_LEN 3
-+#define XPRESS_MAX_MATCH_LEN 65538
-+
-+#endif /* _XPRESS_CONSTANTS_H */
-diff --git a/fs/ntfs3/lib/xpress_decompress.c b/fs/ntfs3/lib/xpress_decompress.c
-new file mode 100644
-index 000000000000..af87a4a91852
---- /dev/null
-+++ b/fs/ntfs3/lib/xpress_decompress.c
-@@ -0,0 +1,165 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * xpress_decompress.c
-+ *
-+ * A decompressor for the XPRESS compression format (Huffman variant).
-+ */
-+
-+/*
-+ *
-+ * Copyright (C) 2012-2016 Eric Biggers
-+ *
-+ * This program is free software: you can redistribute it and/or modify it under
-+ * the terms of the GNU General Public License as published by the Free Software
-+ * Foundation, either version 2 of the License, or (at your option) any later
-+ * version.
-+ *
-+ * This program is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-+ * details.
-+ *
-+ * You should have received a copy of the GNU General Public License along with
-+ * this program. If not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+
-+/*
-+ * The XPRESS compression format is an LZ77 and Huffman-code based algorithm.
-+ * That means it is fairly similar to LZX compression, but XPRESS is simpler, so
-+ * it is a little faster to compress and decompress.
-+ *
-+ * The XPRESS compression format is mostly documented in a file called "[MS-XCA]
-+ * Xpress Compression Algorithm". In the MSDN library, it can currently be
-+ * found under Open Specifications => Protocols => Windows Protocols => Windows
-+ * Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in
-+ * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
-+ * (there apparently are some other versions of XPRESS as well).
-+ *
-+ * If you are already familiar with the LZ77 algorithm and Huffman coding, the
-+ * XPRESS format is fairly simple. The compressed data begins with 256 bytes
-+ * that contain 512 4-bit integers that are the lengths of the symbols in the
-+ * Huffman code used for match/literal headers. In contrast with more
-+ * complicated formats such as DEFLATE and LZX, this is the only Huffman code
-+ * that is used for the entirety of the XPRESS compressed data, and the codeword
-+ * lengths are not encoded with a pretree.
-+ *
-+ * The rest of the compressed data is Huffman-encoded symbols. Values 0 through
-+ * 255 represent the corresponding literal bytes. Values 256 through 511
-+ * represent matches and may require extra bits or bytes to be read to get the
-+ * match offset and match length.
-+ *
-+ * The trickiest part is probably the way in which literal bytes for match
-+ * lengths are interleaved in the bitstream.
-+ *
-+ * Also, a caveat--- according to Microsoft's documentation for XPRESS,
-+ *
-+ * "Some implementation of the decompression algorithm expect an extra
-+ * symbol to mark the end of the data. Specifically, some implementations
-+ * fail during decompression if the Huffman symbol 256 is not found after
-+ * the actual data."
-+ *
-+ * This is the case with Microsoft's implementation in WIMGAPI, for example. So
-+ * although our implementation doesn't currently check for this extra symbol,
-+ * compressors would be wise to add it.
-+ */
-+
-+#include "decompress_common.h"
-+#include "xpress_constants.h"
-+
-+/* This value is chosen for fast decompression. */
-+#define XPRESS_TABLEBITS 11
-+
-+struct xpress_decompressor {
-+ union {
-+ DECODE_TABLE(decode_table, XPRESS_NUM_SYMBOLS,
-+ XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
-+ u8 lens[XPRESS_NUM_SYMBOLS];
-+ };
-+ DECODE_TABLE_WORKING_SPACE(working_space, XPRESS_NUM_SYMBOLS,
-+ XPRESS_MAX_CODEWORD_LEN);
-+} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
-+
-+int
-+xpress_decompress(struct xpress_decompressor *__restrict d,
-+ const void *__restrict compressed_data, size_t compressed_size,
-+ void *__restrict uncompressed_data, size_t uncompressed_size)
-+{
-+ const u8 * const in_begin = compressed_data;
-+ u8 * const out_begin = uncompressed_data;
-+ u8 *out_next = out_begin;
-+ u8 * const out_end = out_begin + uncompressed_size;
-+ struct input_bitstream is;
-+ int i;
-+
-+ /* Read the Huffman codeword lengths. */
-+ if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
-+ return -1;
-+ for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
-+ d->lens[2 * i + 0] = in_begin[i] & 0xf;
-+ d->lens[2 * i + 1] = in_begin[i] >> 4;
-+ }
-+
-+ /* Build a decoding table for the Huffman code. */
-+ if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
-+ XPRESS_TABLEBITS, d->lens,
-+ XPRESS_MAX_CODEWORD_LEN,
-+ d->working_space))
-+ return -1;
-+
-+ /* Decode the matches and literals. */
-+
-+ init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
-+ compressed_size - XPRESS_NUM_SYMBOLS / 2);
-+
-+ while (out_next != out_end) {
-+ u32 sym;
-+ u32 log2_offset;
-+ u32 length;
-+ u32 offset;
-+
-+ sym = read_huffsym(&is, d->decode_table,
-+ XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
-+ if (sym < XPRESS_NUM_CHARS) {
-+ /* Literal */
-+ *out_next++ = sym;
-+ } else {
-+ /* Match */
-+ length = sym & 0xf;
-+ log2_offset = (sym >> 4) & 0xf;
-+
-+ bitstream_ensure_bits(&is, 16);
-+
-+ offset = ((u32)1 << log2_offset) |
-+ bitstream_pop_bits(&is, log2_offset);
-+
-+ if (length == 0xf) {
-+ length += bitstream_read_byte(&is);
-+ if (length == 0xf + 0xff)
-+ length = bitstream_read_u16(&is);
-+ }
-+ length += XPRESS_MIN_MATCH_LEN;
-+
-+ if (unlikely(lz_copy(length, offset,
-+ out_begin, out_next, out_end,
-+ XPRESS_MIN_MATCH_LEN)))
-+ return -1;
-+
-+ out_next += length;
-+ }
-+ }
-+ return 0;
-+}
-+
-+struct xpress_decompressor *
-+xpress_allocate_decompressor(void)
-+{
-+ return aligned_malloc(sizeof(struct xpress_decompressor),
-+ DECODE_TABLE_ALIGNMENT);
-+}
-+
-+void
-+xpress_free_decompressor(struct xpress_decompressor *d)
-+{
-+ aligned_free(d);
-+}
-diff --git a/fs/ntfs3/lznt.c b/fs/ntfs3/lznt.c
-new file mode 100644
-index 000000000000..edba953b754a
---- /dev/null
-+++ b/fs/ntfs3/lznt.c
-@@ -0,0 +1,452 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ *
-+ * Copyright (C) 2019-2020 Paragon Software GmbH, All rights reserved.
-+ *
-+ */
-+#include <linux/blkdev.h>
-+#include <linux/buffer_head.h>
-+#include <linux/fs.h>
-+#include <linux/nls.h>
-+
-+#include "debug.h"
-+#include "ntfs.h"
-+#include "ntfs_fs.h"
-+
-+// clang-format off
-+/* src buffer is zero */
-+#define LZNT_ERROR_ALL_ZEROS 1
-+#define LZNT_CHUNK_SIZE 0x1000
-+// clang-format on
-+
-+struct lznt_hash {
-+ const u8 *p1;
-+ const u8 *p2;
-+};
-+
-+struct lznt {
-+ const u8 *unc;
-+ const u8 *unc_end;
-+ const u8 *best_match;
-+ size_t max_len;
-+ bool std;
-+
-+ struct lznt_hash hash[LZNT_CHUNK_SIZE];
-+};
-+
-+static inline size_t get_match_len(const u8 *ptr, const u8 *end, const u8 *prev,
-+ size_t max_len)
-+{
-+ size_t len = 0;
-+
-+ while (ptr + len < end && ptr[len] == prev[len] && ++len < max_len)
-+ ;
-+ return len;
-+}
-+
-+static size_t longest_match_std(const u8 *src, struct lznt *ctx)
-+{
-+ size_t hash_index;
-+ size_t len1 = 0, len2 = 0;
-+ const u8 **hash;
-+
-+ hash_index =
-+ ((40543U * ((((src[0] << 4) ^ src[1]) << 4) ^ src[2])) >> 4) &
-+ (LZNT_CHUNK_SIZE - 1);
-+
-+ hash = &(ctx->hash[hash_index].p1);
-+
-+ if (hash[0] >= ctx->unc && hash[0] < src && hash[0][0] == src[0] &&
-+ hash[0][1] == src[1] && hash[0][2] == src[2]) {
-+ len1 = 3;
-+ if (ctx->max_len > 3)
-+ len1 += get_match_len(src + 3, ctx->unc_end,
-+ hash[0] + 3, ctx->max_len - 3);
-+ }
-+
-+ if (hash[1] >= ctx->unc && hash[1] < src && hash[1][0] == src[0] &&
-+ hash[1][1] == src[1] && hash[1][2] == src[2]) {
-+ len2 = 3;
-+ if (ctx->max_len > 3)
-+ len2 += get_match_len(src + 3, ctx->unc_end,
-+ hash[1] + 3, ctx->max_len - 3);
-+ }
-+
-+ /* Compare two matches and select the best one */
-+ if (len1 < len2) {
-+ ctx->best_match = hash[1];
-+ len1 = len2;
-+ } else {
-+ ctx->best_match = hash[0];
-+ }
-+
-+ hash[1] = hash[0];
-+ hash[0] = src;
-+ return len1;
-+}
-+
-+static size_t longest_match_best(const u8 *src, struct lznt *ctx)
-+{
-+ size_t max_len;
-+ const u8 *ptr;
-+
-+ if (ctx->unc >= src || !ctx->max_len)
-+ return 0;
-+
-+ max_len = 0;
-+ for (ptr = ctx->unc; ptr < src; ++ptr) {
-+ size_t len =
-+ get_match_len(src, ctx->unc_end, ptr, ctx->max_len);
-+ if (len >= max_len) {
-+ max_len = len;
-+ ctx->best_match = ptr;
-+ }
-+ }
-+
-+ return max_len >= 3 ? max_len : 0;
-+}
-+
-+static const size_t s_max_len[] = {
-+ 0x1002, 0x802, 0x402, 0x202, 0x102, 0x82, 0x42, 0x22, 0x12,
-+};
-+
-+static const size_t s_max_off[] = {
-+ 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
-+};
-+
-+static inline u16 make_pair(size_t offset, size_t len, size_t index)
-+{
-+ return ((offset - 1) << (12 - index)) |
-+ ((len - 3) & (((1 << (12 - index)) - 1)));
-+}
-+
-+static inline size_t parse_pair(u16 pair, size_t *offset, size_t index)
-+{
-+ *offset = 1 + (pair >> (12 - index));
-+ return 3 + (pair & ((1 << (12 - index)) - 1));
-+}
-+
-+/*
-+ * compress_chunk
-+ *
-+ * returns one of the three values:
-+ * 0 - ok, 'cmpr' contains 'cmpr_chunk_size' bytes of compressed data
-+ * 1 - input buffer is full zero
-+ * -2 - the compressed buffer is too small to hold the compressed data
-+ */
-+static inline int compress_chunk(size_t (*match)(const u8 *, struct lznt *),
-+ const u8 *unc, const u8 *unc_end, u8 *cmpr,
-+ u8 *cmpr_end, size_t *cmpr_chunk_size,
-+ struct lznt *ctx)
-+{
-+ size_t cnt = 0;
-+ size_t idx = 0;
-+ const u8 *up = unc;
-+ u8 *cp = cmpr + 3;
-+ u8 *cp2 = cmpr + 2;
-+ u8 not_zero = 0;
-+ /* Control byte of 8-bit values: ( 0 - means byte as is, 1 - short pair ) */
-+ u8 ohdr = 0;
-+ u8 *last;
-+ u16 t16;
-+
-+ if (unc + LZNT_CHUNK_SIZE < unc_end)
-+ unc_end = unc + LZNT_CHUNK_SIZE;
-+
-+ last = min(cmpr + LZNT_CHUNK_SIZE + sizeof(short), cmpr_end);
-+
-+ ctx->unc = unc;
-+ ctx->unc_end = unc_end;
-+ ctx->max_len = s_max_len[0];
-+
-+ while (up < unc_end) {
-+ size_t max_len;
-+
-+ while (unc + s_max_off[idx] < up)
-+ ctx->max_len = s_max_len[++idx];
-+
-+ // Find match
-+ max_len = up + 3 <= unc_end ? (*match)(up, ctx) : 0;
-+
-+ if (!max_len) {
-+ if (cp >= last)
-+ goto NotCompressed;
-+ not_zero |= *cp++ = *up++;
-+ } else if (cp + 1 >= last) {
-+ goto NotCompressed;
-+ } else {
-+ t16 = make_pair(up - ctx->best_match, max_len, idx);
-+ *cp++ = t16;
-+ *cp++ = t16 >> 8;
-+
-+ ohdr |= 1 << cnt;
-+ up += max_len;
-+ }
-+
-+ cnt = (cnt + 1) & 7;
-+ if (!cnt) {
-+ *cp2 = ohdr;
-+ ohdr = 0;
-+ cp2 = cp;
-+ cp += 1;
-+ }
-+ }
-+
-+ if (cp2 < last)
-+ *cp2 = ohdr;
-+ else
-+ cp -= 1;
-+
-+ *cmpr_chunk_size = cp - cmpr;
-+
-+ t16 = (*cmpr_chunk_size - 3) | 0xB000;
-+ cmpr[0] = t16;
-+ cmpr[1] = t16 >> 8;
-+
-+ return not_zero ? 0 : LZNT_ERROR_ALL_ZEROS;
-+
-+NotCompressed:
-+
-+ if ((cmpr + LZNT_CHUNK_SIZE + sizeof(short)) > last)
-+ return -2;
-+
-+ /*
-+ * Copy non cmpr data
-+ * 0x3FFF == ((LZNT_CHUNK_SIZE + 2 - 3) | 0x3000)
-+ */
-+ cmpr[0] = 0xff;
-+ cmpr[1] = 0x3f;
-+
-+ memcpy(cmpr + sizeof(short), unc, LZNT_CHUNK_SIZE);
-+ *cmpr_chunk_size = LZNT_CHUNK_SIZE + sizeof(short);
-+
-+ return 0;
-+}
-+
-+static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr,
-+ const u8 *cmpr_end)
-+{
-+ u8 *up = unc;
-+ u8 ch = *cmpr++;
-+ size_t bit = 0;
-+ size_t index = 0;
-+ u16 pair;
-+ size_t offset, length;
-+
-+ /* Do decompression until pointers are inside range */
-+ while (up < unc_end && cmpr < cmpr_end) {
-+ /* Correct index */
-+ while (unc + s_max_off[index] < up)
-+ index += 1;
-+
-+ /* Check the current flag for zero */
-+ if (!(ch & (1 << bit))) {
-+ /* Just copy byte */
-+ *up++ = *cmpr++;
-+ goto next;
-+ }
-+
-+ /* Check for boundary */
-+ if (cmpr + 1 >= cmpr_end)
-+ return -EINVAL;
-+
-+ /* Read a short from little endian stream */
-+ pair = cmpr[1];
-+ pair <<= 8;
-+ pair |= cmpr[0];
-+
-+ cmpr += 2;
-+
-+ /* Translate packed information into offset and length */
-+ length = parse_pair(pair, &offset, index);
-+
-+ /* Check offset for boundary */
-+ if (unc + offset > up)
-+ return -EINVAL;
-+
-+ /* Truncate the length if necessary */
-+ if (up + length >= unc_end)
-+ length = unc_end - up;
-+
-+ /* Now we copy bytes. This is the heart of LZ algorithm. */
-+ for (; length > 0; length--, up++)
-+ *up = *(up - offset);
-+
-+next:
-+ /* Advance flag bit value */
-+ bit = (bit + 1) & 7;
-+
-+ if (!bit) {
-+ if (cmpr >= cmpr_end)
-+ break;
-+
-+ ch = *cmpr++;
-+ }
-+ }
-+
-+ /* return the size of uncompressed data */
-+ return up - unc;
-+}
-+
-+/*
-+ * 0 - standard compression
-+ * !0 - best compression, requires a lot of cpu
-+ */
-+struct lznt *get_lznt_ctx(int level)
-+{
-+ struct lznt *r = ntfs_alloc(
-+ level ? offsetof(struct lznt, hash) : sizeof(struct lznt), 1);
-+
-+ if (r)
-+ r->std = !level;
-+ return r;
-+}
-+
-+/*
-+ * compress_lznt
-+ *
-+ * Compresses "unc" into "cmpr"
-+ * +x - ok, 'cmpr' contains 'final_compressed_size' bytes of compressed data
-+ * 0 - input buffer is full zero
-+ */
-+size_t compress_lznt(const void *unc, size_t unc_size, void *cmpr,
-+ size_t cmpr_size, struct lznt *ctx)
-+{
-+ int err;
-+ size_t (*match)(const u8 *src, struct lznt *ctx);
-+ u8 *p = cmpr;
-+ u8 *end = p + cmpr_size;
-+ const u8 *unc_chunk = unc;
-+ const u8 *unc_end = unc_chunk + unc_size;
-+ bool is_zero = true;
-+
-+ if (ctx->std) {
-+ match = &longest_match_std;
-+ memset(ctx->hash, 0, sizeof(ctx->hash));
-+ } else {
-+ match = &longest_match_best;
-+ }
-+
-+ /* compression cycle */
-+ for (; unc_chunk < unc_end; unc_chunk += LZNT_CHUNK_SIZE) {
-+ cmpr_size = 0;
-+ err = compress_chunk(match, unc_chunk, unc_end, p, end,
-+ &cmpr_size, ctx);
-+ if (err < 0)
-+ return unc_size;
-+
-+ if (is_zero && err != LZNT_ERROR_ALL_ZEROS)
-+ is_zero = false;
-+
-+ p += cmpr_size;
-+ }
-+
-+ if (p <= end - 2)
-+ p[0] = p[1] = 0;
-+
-+ return is_zero ? 0 : PtrOffset(cmpr, p);
-+}
-+
-+/*
-+ * decompress_lznt
-+ *
-+ * decompresses "cmpr" into "unc"
-+ */
-+ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc,
-+ size_t unc_size)
-+{
-+ const u8 *cmpr_chunk = cmpr;
-+ const u8 *cmpr_end = cmpr_chunk + cmpr_size;
-+ u8 *unc_chunk = unc;
-+ u8 *unc_end = unc_chunk + unc_size;
-+ u16 chunk_hdr;
-+
-+ if (cmpr_size < sizeof(short))
-+ return -EINVAL;
-+
-+ /* read chunk header */
-+ chunk_hdr = cmpr_chunk[1];
-+ chunk_hdr <<= 8;
-+ chunk_hdr |= cmpr_chunk[0];
-+
-+ /* loop through decompressing chunks */
-+ for (;;) {
-+ size_t chunk_size_saved;
-+ size_t unc_use;
-+ size_t cmpr_use = 3 + (chunk_hdr & (LZNT_CHUNK_SIZE - 1));
-+
-+ /* Check that the chunk actually fits the supplied buffer */
-+ if (cmpr_chunk + cmpr_use > cmpr_end)
-+ return -EINVAL;
-+
-+ /* First make sure the chunk contains compressed data */
-+ if (chunk_hdr & 0x8000) {
-+ /* Decompress a chunk and return if we get an error */
-+ ssize_t err =
-+ decompress_chunk(unc_chunk, unc_end,
-+ cmpr_chunk + sizeof(chunk_hdr),
-+ cmpr_chunk + cmpr_use);
-+ if (err < 0)
-+ return err;
-+ unc_use = err;
-+ } else {
-+ /* This chunk does not contain compressed data */
-+ unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end ?
-+ unc_end - unc_chunk :
-+ LZNT_CHUNK_SIZE;
-+
-+ if (cmpr_chunk + sizeof(chunk_hdr) + unc_use >
-+ cmpr_end) {
-+ return -EINVAL;
-+ }
-+
-+ memcpy(unc_chunk, cmpr_chunk + sizeof(chunk_hdr),
-+ unc_use);
-+ }
-+
-+ /* Advance pointers */
-+ cmpr_chunk += cmpr_use;
-+ unc_chunk += unc_use;
-+
-+ /* Check for the end of unc buffer */
-+ if (unc_chunk >= unc_end)
-+ break;
-+
-+ /* Proceed the next chunk */
-+ if (cmpr_chunk > cmpr_end - 2)
-+ break;
-+
-+ chunk_size_saved = LZNT_CHUNK_SIZE;
-+
-+ /* read chunk header */
-+ chunk_hdr = cmpr_chunk[1];
-+ chunk_hdr <<= 8;
-+ chunk_hdr |= cmpr_chunk[0];
-+
-+ if (!chunk_hdr)
-+ break;
-+
-+ /* Check the size of unc buffer */
-+ if (unc_use < chunk_size_saved) {
-+ size_t t1 = chunk_size_saved - unc_use;
-+ u8 *t2 = unc_chunk + t1;
-+
-+ /* 'Zero' memory */
-+ if (t2 >= unc_end)
-+ break;
-+
-+ memset(unc_chunk, 0, t1);
-+ unc_chunk = t2;
-+ }
-+ }
-+
-+ /* Check compression boundary */
-+ if (cmpr_chunk > cmpr_end)
-+ return -EINVAL;
-+
-+ /*
-+ * The unc size is just a difference between current
-+ * pointer and original one
-+ */
-+ return PtrOffset(unc, unc_chunk);
-+}
---
-2.25.4
-
-
diff --git a/PATCH-v14-01-10-fs-ntfs3-Add-headers-and-misc-files.patch b/PATCH-v16-01-10-fs-ntfs3-Add-headers-and-misc-files.patch
similarity index 93%
rename from PATCH-v14-01-10-fs-ntfs3-Add-headers-and-misc-files.patch
rename to PATCH-v16-01-10-fs-ntfs3-Add-headers-and-misc-files.patch
index df139fc..7439fa5 100644
--- a/PATCH-v14-01-10-fs-ntfs3-Add-headers-and-misc-files.patch
+++ b/PATCH-v16-01-10-fs-ntfs3-Add-headers-and-misc-files.patch
@@ -1,12 +1,78 @@
+From mboxrd@z Thu Jan 1 00:00:00 1970
+Return-Path: <linux-kernel-owner@kernel.org>
+X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
+ aws-us-west-2-korg-lkml-1.web.codeaurora.org
+X-Spam-Level:
+X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
+ DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,
+ INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
+ USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0
+Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
+ by smtp.lore.kernel.org (Postfix) with ESMTP id C2230C433E6
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:44 +0000 (UTC)
+Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
+ by mail.kernel.org (Postfix) with ESMTP id 9AC4A23331
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:44 +0000 (UTC)
+Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
+ id S1729275AbgLYNyc (ORCPT
+ <rfc822;linux-kernel@archiver.kernel.org>);
+ Fri, 25 Dec 2020 08:54:32 -0500
+Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:50618 "EHLO
+ relaydlg-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
+ by vger.kernel.org with ESMTP id S1726144AbgLYNyW (ORCPT
+ <rfc822;linux-kernel@vger.kernel.org>);
+ Fri, 25 Dec 2020 08:54:22 -0500
+Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
+ by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id 93515808D7;
+ Fri, 25 Dec 2020 16:53:25 +0300 (MSK)
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
+ d=paragon-software.com; s=mail; t=1608904405;
+ bh=KjDa8GDaOX+4G1rQhTYHjgpfJ0JTjW5Qgv/7EaeXMK4=;
+ h=From:To:CC:Subject:Date:In-Reply-To:References;
+ b=UuDdhzPEV4QtgqyIPLb0dk3Umxn2sBzCB0/q1zBYxeZ2H+6+OLatvJhS6sDJS2b6X
+ zFB1t1I3KubTL8EYNy6OG7kMc6KEeTQQ52963L1ArEMjmmmrta890u2/5j7UTT37/R
+ zjb/5lVdwl+j0agPYqm5BD9xeAbwKuCb2lAcTz2I=
+Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
+ vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
+ (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:25 +0300
+From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+To: <linux-fsdevel@vger.kernel.org>
+CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
+ <pali@kernel.org>, <dsterba@suse.cz>, <aaptel@suse.com>,
+ <willy@infradead.org>, <rdunlap@infradead.org>, <joe@perches.com>,
+ <mark@harmstone.com>, <nborisov@suse.com>,
+ <linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
+ <dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
+ Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 01/10] fs/ntfs3: Add headers and misc files
+Date: Fri, 25 Dec 2020 16:51:10 +0300
+Message-ID: <20201225135119.3666763-2-almaz.alexandrovich@paragon-software.com>
+X-Mailer: git-send-email 2.25.4
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+MIME-Version: 1.0
+Content-Transfer-Encoding: 8bit
+Content-Type: text/plain
+X-Originating-IP: [172.30.114.105]
+X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
+ vdlg-exch-02.paragon-software.com (172.30.1.105)
+Precedence: bulk
+List-ID: <linux-kernel.vger.kernel.org>
+X-Mailing-List: linux-kernel@vger.kernel.org
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-2-almaz.alexandrovich@paragon-software.com/>
+List-Archive: <https://lore.kernel.org/lkml/>
+List-Post: <mailto:linux-kernel@vger.kernel.org>
+
This adds headers and misc files
Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
---
fs/ntfs3/debug.h | 61 +++
fs/ntfs3/ntfs.h | 1237 ++++++++++++++++++++++++++++++++++++++++++++
- fs/ntfs3/ntfs_fs.h | 1075 ++++++++++++++++++++++++++++++++++++++
+ fs/ntfs3/ntfs_fs.h | 1050 +++++++++++++++++++++++++++++++++++++
fs/ntfs3/upcase.c | 77 +++
- 4 files changed, 2450 insertions(+)
+ 4 files changed, 2425 insertions(+)
create mode 100644 fs/ntfs3/debug.h
create mode 100644 fs/ntfs3/ntfs.h
create mode 100644 fs/ntfs3/ntfs_fs.h
@@ -1324,10 +1390,10 @@ index 000000000000..9b1a9be80529
+// clang-format on
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
new file mode 100644
-index 000000000000..402ba820f99d
+index 000000000000..6bdd2d35a1d7
--- /dev/null
+++ b/fs/ntfs3/ntfs_fs.h
-@@ -0,0 +1,1075 @@
+@@ -0,0 +1,1050 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
@@ -1589,14 +1655,12 @@ index 000000000000..402ba820f99d
+ } objid;
+
+ struct {
-+ /*
-+ * protect 'lznt/xpress/lzx'
-+ * Should we use different spinlocks for each ctx?
-+ */
-+ spinlock_t lock;
++ struct mutex mtx_lznt;
+ struct lznt *lznt;
+#ifdef CONFIG_NTFS3_LZX_XPRESS
++ struct mutex mtx_xpress;
+ struct xpress_decompressor *xpress;
++ struct mutex mtx_lzx;
+ struct lzx_decompressor *lzx;
+#endif
+ } compress;
@@ -1640,7 +1704,7 @@ index 000000000000..402ba820f99d
+ * Range [i_valid - inode->i_size) - contains 0
+ * Usually i_valid <= inode->i_size
+ */
-+ loff_t i_valid;
++ u64 i_valid;
+ struct timespec64 i_crtime;
+
+ struct mutex ni_lock;
@@ -1729,6 +1793,8 @@ index 000000000000..402ba820f99d
+ CLST frame, CLST *clst_data);
+int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size,
+ u64 new_valid);
++int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes);
++int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes);
+
+/* functions from attrlist.c*/
+void al_destroy(struct ntfs_inode *ni);
@@ -1778,8 +1844,7 @@ index 000000000000..402ba820f99d
+void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn,
+ CLST len);
+int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync);
-+void ntfs_truncate_blocks(struct inode *inode, loff_t offset);
-+int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
++int ntfs3_setattr(struct dentry *dentry, struct iattr *attr);
+int ntfs_file_open(struct inode *inode, struct file *file);
+int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len);
@@ -1801,7 +1866,8 @@ index 000000000000..402ba820f99d
+ u8 name_len, const CLST *vcn,
+ struct mft_inode **mi);
+struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr,
-+ struct ATTR_LIST_ENTRY **le);
++ struct ATTR_LIST_ENTRY **le,
++ struct mft_inode **mi);
+struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len, CLST vcn,
+ struct mft_inode **pmi);
@@ -1829,7 +1895,6 @@ index 000000000000..402ba820f99d
+ struct ATTR_LIST_ENTRY **entry);
+struct ATTR_FILE_NAME *ni_fname_type(struct ntfs_inode *ni, u8 name_type,
+ struct ATTR_LIST_ENTRY **entry);
-+u16 ni_fnames_count(struct ntfs_inode *ni);
+int ni_new_attr_flags(struct ntfs_inode *ni, enum FILE_ATTRIBUTE new_fa);
+enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr,
+ void *buffer);
@@ -1951,7 +2016,7 @@ index 000000000000..402ba820f99d
+int reset_log_file(struct inode *inode);
+int ntfs_get_block(struct inode *inode, sector_t vbn,
+ struct buffer_head *bh_result, int create);
-+int ntfs_write_inode(struct inode *inode, struct writeback_control *wbc);
++int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc);
+int ntfs_sync_inode(struct inode *inode);
+int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
+ struct inode *i2);
@@ -1963,7 +2028,6 @@ index 000000000000..402ba820f99d
+int ntfs_link_inode(struct inode *inode, struct dentry *dentry);
+int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry);
+void ntfs_evict_inode(struct inode *inode);
-+int ntfs_readpage(struct file *file, struct page *page);
+extern const struct inode_operations ntfs_link_inode_operations;
+extern const struct address_space_operations ntfs_aops;
+extern const struct address_space_operations ntfs_aops_cmpr;
@@ -1971,7 +2035,7 @@ index 000000000000..402ba820f99d
+/* globals from name_i.c*/
+int fill_name_de(struct ntfs_sb_info *sbi, void *buf, const struct qstr *name,
+ const struct cpu_str *uni);
-+struct dentry *ntfs_get_parent(struct dentry *child);
++struct dentry *ntfs3_get_parent(struct dentry *child);
+
+extern const struct inode_operations ntfs_dir_inode_operations;
+
@@ -2027,6 +2091,7 @@ index 000000000000..402ba820f99d
+bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *Index);
+bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len,
+ bool is_mft);
++bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len);
+bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn,
+ CLST *lcn, CLST *len);
+bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn);
@@ -2058,13 +2123,11 @@ index 000000000000..402ba820f99d
+{
+ return wnd->total_zeroes;
+}
-+void wnd_trace(struct wnd_bitmap *wnd);
-+void wnd_trace_tree(struct wnd_bitmap *wnd, u32 nExtents, const char *Hint);
-+int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nBits);
-+int wnd_set_free(struct wnd_bitmap *wnd, size_t FirstBit, size_t Bits);
-+int wnd_set_used(struct wnd_bitmap *wnd, size_t FirstBit, size_t Bits);
-+bool wnd_is_free(struct wnd_bitmap *wnd, size_t FirstBit, size_t Bits);
-+bool wnd_is_used(struct wnd_bitmap *wnd, size_t FirstBit, size_t Bits);
++int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits);
++int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits);
++int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits);
++bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits);
++bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits);
+
+/* Possible values for 'flags' 'wnd_find' */
+#define BITMAP_FIND_MARK_AS_USED 0x01
@@ -2082,12 +2145,18 @@ index 000000000000..402ba820f99d
+ const u16 *upcase);
+
+/* globals from xattr.c */
++#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+struct posix_acl *ntfs_get_acl(struct inode *inode, int type);
+int ntfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
++int ntfs_init_acl(struct inode *inode, struct inode *dir);
++#else
++#define ntfs_get_acl NULL
++#define ntfs_set_acl NULL
++#endif
++
+int ntfs_acl_chmod(struct inode *inode);
+int ntfs_permission(struct inode *inode, int mask);
+ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-+int ntfs_init_acl(struct inode *inode, struct inode *dir);
+extern const struct xattr_handler *ntfs_xattr_handlers[];
+
+/* globals from lznt.c */
@@ -2243,15 +2312,6 @@ index 000000000000..402ba820f99d
+ return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+}
+
-+/* calculates ((bytes + frame_size - 1)/frame_size)*frame_size; */
-+static inline u64 ntfs_up_frame(const struct ntfs_sb_info *sbi, u64 bytes,
-+ u8 c_unit)
-+{
-+ u32 bytes_per_frame = 1u << (c_unit + sbi->cluster_bits);
-+
-+ return (bytes + bytes_per_frame - 1) & ~(u64)(bytes_per_frame - 1);
-+}
-+
+static inline struct buffer_head *ntfs_bread(struct super_block *sb,
+ sector_t block)
+{
@@ -2384,25 +2444,6 @@ index 000000000000..402ba820f99d
+{
+ *var = cpu_to_le64(le64_to_cpu(*var) - val);
+}
-+
-+#ifdef CONFIG_NTFS3_LZX_XPRESS
-+/* globals from lib/decompress_xpress.c */
-+struct xpress_decompressor *xpress_allocate_decompressor(void);
-+void xpress_free_decompressor(struct xpress_decompressor *d);
-+int xpress_decompress(struct xpress_decompressor *__restrict d,
-+ const void *__restrict compressed_data,
-+ size_t compressed_size,
-+ void *__restrict uncompressed_data,
-+ size_t uncompressed_size);
-+
-+/* globals from lib/decompress_lzx.c */
-+struct lzx_decompressor *lzx_allocate_decompressor(size_t max_block_size);
-+void lzx_free_decompressor(struct lzx_decompressor *d);
-+int lzx_decompress(struct lzx_decompressor *__restrict d,
-+ const void *__restrict compressed_data,
-+ size_t compressed_size, void *__restrict uncompressed_data,
-+ size_t uncompressed_size);
-+#endif
diff --git a/fs/ntfs3/upcase.c b/fs/ntfs3/upcase.c
new file mode 100644
index 000000000000..87b65f9ccc16
diff --git a/PATCH-v14-02-10-fs-ntfs3-Add-initialization-of-super-block.patch b/PATCH-v16-02-10-fs-ntfs3-Add-initialization-of-super-block.patch
similarity index 98%
rename from PATCH-v14-02-10-fs-ntfs3-Add-initialization-of-super-block.patch
rename to PATCH-v16-02-10-fs-ntfs3-Add-initialization-of-super-block.patch
index cca500b..53e58ac 100644
--- a/PATCH-v14-02-10-fs-ntfs3-Add-initialization-of-super-block.patch
+++ b/PATCH-v16-02-10-fs-ntfs3-Add-initialization-of-super-block.patch
@@ -8,34 +8,34 @@ X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id 47B4FC1B0D9
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:26 +0000 (UTC)
+ by smtp.lore.kernel.org (Postfix) with ESMTP id 7858FC4332D
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:26 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id 12D9E22C9C
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:26 +0000 (UTC)
+ by mail.kernel.org (Postfix) with ESMTP id 39A4F2333B
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:26 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730807AbgLDPs7 (ORCPT
+ id S1729186AbgLYNyY (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:48:59 -0500
-Received: from relayfre-01.paragon-software.com ([176.12.100.13]:55210 "EHLO
+ Fri, 25 Dec 2020 08:54:24 -0500
+Received: from relayfre-01.paragon-software.com ([176.12.100.13]:47240 "EHLO
relayfre-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730497AbgLDPs4 (ORCPT
+ by vger.kernel.org with ESMTP id S1726164AbgLYNyU (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:48:56 -0500
+ Fri, 25 Dec 2020 08:54:20 -0500
Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relayfre-01.paragon-software.com (Postfix) with ESMTPS id 2F7AF1D2D;
- Fri, 4 Dec 2020 18:48:07 +0300 (MSK)
+ by relayfre-01.paragon-software.com (Postfix) with ESMTPS id 378001D60;
+ Fri, 25 Dec 2020 16:53:26 +0300 (MSK)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096887;
- bh=UzZrLzB+qcyDrnPJLsujLosIxI4A6el31A6itemIrFE=;
+ d=paragon-software.com; s=mail; t=1608904406;
+ bh=vutJMCWewDkU0CfOQtx+/cTQ7kZDXsADiZSdaCyB/+M=;
h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=NTGINcyEx+Shnt+gQMmOT5Li4+4zwng30xpIFSZUnjJaebsJMmCmTnJNWCThsQZI6
- 0+NN+jb+D5K1b2zI9zg0oMM/EAYknZw+QIj2vnfICk5tg1C0N3XR+ku5pzNI1wcCxN
- rRDow7j5eG97TBIsS4Nclv0XpRJZ7gGDYPkDmGIg=
+ b=WYdieos2IvqQBfbwyTuM3vp9SeeVfjnCAnVI1dYGB4Wpv6ATgQyes3y7n2P975Xsd
+ NnNkoUCApjGSKTufFKqZh3ShTakHMyyg9PLlQx7h7jquUGr9QRm5Zb86hnklgILYfo
+ arljlCPQKYkFJR8H6UFqbD5/OZd+ze6gtaTBlUFA=
Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:06 +0300
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:25 +0300
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
To: <linux-fsdevel@vger.kernel.org>
CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
@@ -45,12 +45,12 @@ CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
<linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
<dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 02/10] fs/ntfs3: Add initialization of super block
-Date: Fri, 4 Dec 2020 18:45:52 +0300
-Message-ID: <20201204154600.1546096-3-almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 02/10] fs/ntfs3: Add initialization of super block
+Date: Fri, 25 Dec 2020 16:51:11 +0300
+Message-ID: <20201225135119.3666763-3-almaz.alexandrovich@paragon-software.com>
X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
@@ -60,7 +60,7 @@ X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-3-almaz.alexandrovich@paragon-software.com/>
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-3-almaz.alexandrovich@paragon-software.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
@@ -68,11 +68,11 @@ This adds initialization of super block
Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
---
- fs/ntfs3/fsntfs.c | 2528 ++++++++++++++++++++++++++++++++++++++++++
+ fs/ntfs3/fsntfs.c | 2527 ++++++++++++++++++++++++++++++++++++++++++
fs/ntfs3/index.c | 2665 +++++++++++++++++++++++++++++++++++++++++++++
- fs/ntfs3/inode.c | 2056 ++++++++++++++++++++++++++++++++++
- fs/ntfs3/super.c | 1464 +++++++++++++++++++++++++
- 4 files changed, 8713 insertions(+)
+ fs/ntfs3/inode.c | 2061 +++++++++++++++++++++++++++++++++++
+ fs/ntfs3/super.c | 1477 +++++++++++++++++++++++++
+ 4 files changed, 8730 insertions(+)
create mode 100644 fs/ntfs3/fsntfs.c
create mode 100644 fs/ntfs3/index.c
create mode 100644 fs/ntfs3/inode.c
@@ -80,10 +80,10 @@ Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
new file mode 100644
-index 000000000000..d7614749dd7e
+index 000000000000..eb972616c426
--- /dev/null
+++ b/fs/ntfs3/fsntfs.c
-@@ -0,0 +1,2528 @@
+@@ -0,0 +1,2527 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
@@ -1623,8 +1623,7 @@ index 000000000000..d7614749dd7e
+
+ if (add + off == PAGE_SIZE) {
+ page_idx += 1;
-+ if (page_idx >= nr_pages) {
-+ WARN_ON(1);
++ if (WARN_ON(page_idx >= nr_pages)) {
+ err = -EINVAL;
+ goto out;
+ }
@@ -2614,7 +2613,7 @@ index 000000000000..d7614749dd7e
+}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
new file mode 100644
-index 000000000000..b7caeb3da500
+index 000000000000..64855f9c0ad4
--- /dev/null
+++ b/fs/ntfs3/index.c
@@ -0,0 +1,2665 @@
@@ -2858,8 +2857,8 @@ index 000000000000..b7caeb3da500
+ }
+
+ data_size = le64_to_cpu(b->nres.data_size);
-+ if (off >= data_size) {
-+ WARN_ON(1);
++ if (WARN_ON(off >= data_size)) {
++ /* looks like filesystem error */
+ return -EINVAL;
+ }
+
@@ -5285,10 +5284,10 @@ index 000000000000..b7caeb3da500
+}
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
new file mode 100644
-index 000000000000..13116e68f9f6
+index 000000000000..2b64fe8c1377
--- /dev/null
+++ b/fs/ntfs3/inode.c
-@@ -0,0 +1,2056 @@
+@@ -0,0 +1,2061 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
@@ -5399,12 +5398,12 @@ index 000000000000..13116e68f9f6
+
+ /*
+ * to reduce tab pressure use goto instead of
-+ * while( (attr = ni_enum_attr_ex(ni, attr, &le) ))
++ * while( (attr = ni_enum_attr_ex(ni, attr, &le, NULL) ))
+ */
+next_attr:
+ run = NULL;
+ err = -EINVAL;
-+ attr = ni_enum_attr_ex(ni, attr, &le);
++ attr = ni_enum_attr_ex(ni, attr, &le, NULL);
+ if (!attr)
+ goto end_enum;
+
@@ -5968,7 +5967,7 @@ index 000000000000..13116e68f9f6
+ return generic_block_bmap(mapping, block, ntfs_get_block_bmap);
+}
+
-+int ntfs_readpage(struct file *file, struct page *page)
++static int ntfs_readpage(struct file *file, struct page *page)
+{
+ int err;
+ struct address_space *mapping = page->mapping;
@@ -6319,7 +6318,7 @@ index 000000000000..13116e68f9f6
+ return err;
+}
+
-+int ntfs_write_inode(struct inode *inode, struct writeback_control *wbc)
++int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ return _ni_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
@@ -6876,11 +6875,14 @@ index 000000000000..13116e68f9f6
+
+ inode->i_mode = mode;
+
++#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ if (!is_link && (sb->s_flags & SB_POSIXACL)) {
+ err = ntfs_init_acl(inode, dir);
+ if (err)
+ goto out6;
-+ } else {
++ } else
++#endif
++ {
+ inode->i_flags |= S_NOSEC;
+ }
+
@@ -7254,8 +7256,10 @@ index 000000000000..13116e68f9f6
+ goto out;
+
+ default:
-+ if (IsReparseTagMicrosoft(rp->ReparseTag))
++ if (IsReparseTagMicrosoft(rp->ReparseTag)) {
++ /* unknown Microsoft Tag */
+ goto out;
++ }
+ if (!IsReparseTagNameSurrogate(rp->ReparseTag) ||
+ i_size <= sizeof(struct REPARSE_POINT)) {
+ goto out;
@@ -7323,7 +7327,7 @@ index 000000000000..13116e68f9f6
+
+const struct inode_operations ntfs_link_inode_operations = {
+ .get_link = ntfs_get_link,
-+ .setattr = ntfs_setattr,
++ .setattr = ntfs3_setattr,
+ .listxattr = ntfs_listxattr,
+ .permission = ntfs_permission,
+ .get_acl = ntfs_get_acl,
@@ -7347,10 +7351,10 @@ index 000000000000..13116e68f9f6
+};
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
new file mode 100644
-index 000000000000..6b53001052a6
+index 000000000000..f53c3f770826
--- /dev/null
+++ b/fs/ntfs3/super.c
-@@ -0,0 +1,1464 @@
+@@ -0,0 +1,1477 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
@@ -7385,6 +7389,9 @@ index 000000000000..6b53001052a6
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
++#ifdef CONFIG_NTFS3_LZX_XPRESS
++#include "lib/lib.h"
++#endif
+
+#ifdef CONFIG_PRINTK
+/*
@@ -7664,8 +7671,13 @@ index 000000000000..6b53001052a6
+ opts->nohidden = 1;
+ break;
+ case Opt_acl:
++#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ sb->s_flags |= SB_POSIXACL;
+ break;
++#else
++ ntfs_err(sb, "support for ACL not compiled in!");
++ return -EINVAL;
++#endif
+ case Opt_noatime:
+ sb->s_flags |= SB_NOATIME;
+ break;
@@ -7973,7 +7985,7 @@ index 000000000000..6b53001052a6
+ .show_options = ntfs_show_options,
+ .sync_fs = ntfs_sync_fs,
+ .remount_fs = ntfs_remount,
-+ .write_inode = ntfs_write_inode,
++ .write_inode = ntfs3_write_inode,
+};
+
+static struct inode *ntfs_export_get_inode(struct super_block *sb, u64 ino,
@@ -8022,7 +8034,7 @@ index 000000000000..6b53001052a6
+static const struct export_operations ntfs_export_ops = {
+ .fh_to_dentry = ntfs_fh_to_dentry,
+ .fh_to_parent = ntfs_fh_to_parent,
-+ .get_parent = ntfs_get_parent,
++ .get_parent = ntfs3_get_parent,
+ .commit_metadata = ntfs_nfs_commit_metadata,
+};
+
@@ -8083,6 +8095,7 @@ index 000000000000..6b53001052a6
+ goto out;
+ }
+
++ /* cluster size: 512, 1K, 2K, 4K, ... 2M */
+ sct_per_clst = true_sectors_per_clst(boot);
+ if (!is_power_of2(sct_per_clst))
+ goto out;
@@ -8296,7 +8309,11 @@ index 000000000000..6b53001052a6
+ if (err)
+ goto out;
+
-+ spin_lock_init(&sbi->compress.lock);
++ mutex_init(&sbi->compress.mtx_lznt);
++#ifdef CONFIG_NTFS3_LZX_XPRESS
++ mutex_init(&sbi->compress.mtx_xpress);
++ mutex_init(&sbi->compress.mtx_lzx);
++#endif
+
+ /*
+ * Load $Volume. This should be done before $LogFile
@@ -8585,7 +8602,7 @@ index 000000000000..6b53001052a6
+ }
+
+ for (idx = 0; idx < (0x10000 * sizeof(short) >> PAGE_SHIFT); idx++) {
-+ const u16 *src;
++ const __le16 *src;
+ u16 *dst = Add2Ptr(upcase, idx << PAGE_SHIFT);
+ struct page *page = ntfs_map_page(inode->i_mapping, idx);
+
diff --git a/PATCH-v14-03-10-fs-ntfs3-Add-bitmap.patch b/PATCH-v16-03-10-fs-ntfs3-Add-bitmap.patch
similarity index 96%
rename from PATCH-v14-03-10-fs-ntfs3-Add-bitmap.patch
rename to PATCH-v16-03-10-fs-ntfs3-Add-bitmap.patch
index 9e524cd..0ac2d48 100644
--- a/PATCH-v14-03-10-fs-ntfs3-Add-bitmap.patch
+++ b/PATCH-v16-03-10-fs-ntfs3-Add-bitmap.patch
@@ -8,34 +8,34 @@ X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id A9A46C0018C
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:25 +0000 (UTC)
+ by smtp.lore.kernel.org (Postfix) with ESMTP id 901F5C433E9
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:55:12 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id 82E8B22C97
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:25 +0000 (UTC)
+ by mail.kernel.org (Postfix) with ESMTP id 6863F22795
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:55:12 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730781AbgLDPsx (ORCPT
+ id S1729401AbgLYNyx (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:48:53 -0500
-Received: from relayfre-01.paragon-software.com ([176.12.100.13]:55234 "EHLO
+ Fri, 25 Dec 2020 08:54:53 -0500
+Received: from relayfre-01.paragon-software.com ([176.12.100.13]:47266 "EHLO
relayfre-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730509AbgLDPsv (ORCPT
+ by vger.kernel.org with ESMTP id S1726179AbgLYNyK (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:48:51 -0500
+ Fri, 25 Dec 2020 08:54:10 -0500
Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relayfre-01.paragon-software.com (Postfix) with ESMTPS id 67AB81D3E;
- Fri, 4 Dec 2020 18:48:07 +0300 (MSK)
+ by relayfre-01.paragon-software.com (Postfix) with ESMTPS id 6CF291D6E;
+ Fri, 25 Dec 2020 16:53:26 +0300 (MSK)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096887;
+ d=paragon-software.com; s=mail; t=1608904406;
bh=dMWweGuH2ggisHm6Gp1PMQCBWAZCxVnHdGXjB2AeV0Y=;
h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=uSuHJ8tl9oZQgZ+JS6B7CuROGa0hu4BQRKP5BQ8/z5pNdgHFz8vcJkH9AJNuC2cZy
- sfy0xKno+DFTnXz7BQaiAuwVA+t5LlX/cxyUiQIOyyPF58iZtLzm8CQNt/eMojJL0D
- MQgy+HYyV1agrcnBUFDhKOGHE64OK7YHc/1NzG4A=
+ b=Ej3/trew1pMiqz7yiLe7YiakjxUZmH0flqKMrXMrBOFBU4yuor+ig2c6QMua+hniG
+ rLOj0YIPuu5G28Iv7M87lonimh090CPLQAG3+F9iXRfc0Waera2NECMAUTcn141Q+U
+ AaKyNLnTfLX1VcPNCf2mordmXo/jUQaX04HZDu0s=
Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:06 +0300
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:25 +0300
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
To: <linux-fsdevel@vger.kernel.org>
CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
@@ -45,12 +45,12 @@ CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
<linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
<dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 03/10] fs/ntfs3: Add bitmap
-Date: Fri, 4 Dec 2020 18:45:53 +0300
-Message-ID: <20201204154600.1546096-4-almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 03/10] fs/ntfs3: Add bitmap
+Date: Fri, 25 Dec 2020 16:51:12 +0300
+Message-ID: <20201225135119.3666763-4-almaz.alexandrovich@paragon-software.com>
X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
@@ -60,7 +60,7 @@ X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-4-almaz.alexandrovich@paragon-software.com/>
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-4-almaz.alexandrovich@paragon-software.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
diff --git a/PATCH-v14-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch b/PATCH-v16-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch
similarity index 94%
rename from PATCH-v14-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch
rename to PATCH-v16-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch
index 2433dbc..15351af 100644
--- a/PATCH-v14-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch
+++ b/PATCH-v16-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch
@@ -5,37 +5,37 @@ X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
X-Spam-Level:
X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,
- INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT
- autolearn=ham autolearn_force=no version=3.4.0
+ INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
+ USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id D1F1AC2BBCA
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:26 +0000 (UTC)
+ by smtp.lore.kernel.org (Postfix) with ESMTP id EA709C43381
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:44 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id B0D9F22C97
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:26 +0000 (UTC)
+ by mail.kernel.org (Postfix) with ESMTP id B9F4B2312A
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:44 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730876AbgLDPtR (ORCPT
+ id S1729303AbgLYNyh (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:49:17 -0500
-Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:37114 "EHLO
+ Fri, 25 Dec 2020 08:54:37 -0500
+Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:50639 "EHLO
relaydlg-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730722AbgLDPtQ (ORCPT
+ by vger.kernel.org with ESMTP id S1726232AbgLYNyf (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:49:16 -0500
+ Fri, 25 Dec 2020 08:54:35 -0500
Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id A58178225A;
- Fri, 4 Dec 2020 18:48:07 +0300 (MSK)
+ by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id B25CF821A3;
+ Fri, 25 Dec 2020 16:53:26 +0300 (MSK)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096887;
- bh=6RhajCZp7IpgFnc6Ghg38G1VDW37O1OVRf7DuxPR17I=;
+ d=paragon-software.com; s=mail; t=1608904406;
+ bh=/2xHIb9Bhj1Sg9lNs1CpT2KwK501VjPJYf6biEpX6cg=;
h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=AQaE42d8Se9kut1Nc4F7kVGAT9PXU2T1AkqS9Oxu7t0C5E44aUjliWN13t/KG7zMp
- nMUBXh49Y4mW4N4pFCgFMJbCydV/j0J4Yv+ZTsrTpJxGcTu9FzjTpsdrHmrR82N7SE
- EApu9iXgpzw7nQ81sgsJrCh1SLcjrnO9OnOqDDh8=
+ b=moMYpIhJdkEAGFXrnUZj7tMX/Hruf4Sf785VzGDRa7kiDKlpB1HYg9B1CJbeC/sCo
+ U9EcMAidrORAYyYvp8v0AJkV/wPtynNFdMTMPA1E6h7wZhn0398mZZYhig84lc9tNL
+ uMEWN/9eoSRO50OyC3XxF1jdJUsUGNpLm9N00ZPw=
Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:06 +0300
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:25 +0300
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
To: <linux-fsdevel@vger.kernel.org>
CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
@@ -45,22 +45,22 @@ CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
<linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
<dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 04/10] fs/ntfs3: Add file operations and implementation
-Date: Fri, 4 Dec 2020 18:45:54 +0300
-Message-ID: <20201204154600.1546096-5-almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 04/10] fs/ntfs3: Add file operations and implementation
+Date: Fri, 25 Dec 2020 16:51:13 +0300
+Message-ID: <20201225135119.3666763-5-almaz.alexandrovich@paragon-software.com>
X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
MIME-Version: 1.0
+Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 8bit
-Content-Type: text/plain
X-Originating-IP: [172.30.114.105]
X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
vdlg-exch-02.paragon-software.com (172.30.1.105)
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-5-almaz.alexandrovich@paragon-software.com/>
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-5-almaz.alexandrovich@paragon-software.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
@@ -69,12 +69,12 @@ This adds file operations and implementation
Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
---
fs/ntfs3/dir.c | 575 +++++++++
- fs/ntfs3/file.c | 1093 ++++++++++++++++
- fs/ntfs3/frecord.c | 3085 ++++++++++++++++++++++++++++++++++++++++++++
+ fs/ntfs3/file.c | 1140 ++++++++++++++++
+ fs/ntfs3/frecord.c | 3088 ++++++++++++++++++++++++++++++++++++++++++++
fs/ntfs3/namei.c | 590 +++++++++
fs/ntfs3/record.c | 614 +++++++++
- fs/ntfs3/run.c | 1192 +++++++++++++++++
- 6 files changed, 7149 insertions(+)
+ fs/ntfs3/run.c | 1254 ++++++++++++++++++
+ 6 files changed, 7261 insertions(+)
create mode 100644 fs/ntfs3/dir.c
create mode 100644 fs/ntfs3/file.c
create mode 100644 fs/ntfs3/frecord.c
@@ -665,10 +665,10 @@ index 000000000000..f1df3f9ead9c
+};
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
new file mode 100644
-index 000000000000..03e8eecc58b9
+index 000000000000..a65ca1673e73
--- /dev/null
+++ b/fs/ntfs3/file.c
-@@ -0,0 +1,1093 @@
+@@ -0,0 +1,1140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
@@ -1021,8 +1021,8 @@ index 000000000000..03e8eecc58b9
+ return generic_file_fsync(filp, start, end, datasync);
+}
+
-+static int ntfs_extend_ex(struct inode *inode, loff_t pos, size_t count,
-+ struct file *file)
++static int ntfs_extend(struct inode *inode, loff_t pos, size_t count,
++ struct file *file)
+{
+ struct ntfs_inode *ni = ntfs_i(inode);
+ struct address_space *mapping = inode->i_mapping;
@@ -1072,6 +1072,63 @@ index 000000000000..03e8eecc58b9
+ return err;
+}
+
++static int ntfs_truncate(struct inode *inode, loff_t new_size)
++{
++ struct super_block *sb = inode->i_sb;
++ struct ntfs_sb_info *sbi = sb->s_fs_info;
++ struct ntfs_inode *ni = ntfs_i(inode);
++ int err, dirty = 0;
++ u32 vcn;
++ u64 new_valid;
++
++ if (!S_ISREG(inode->i_mode))
++ return 0;
++
++ if (is_compressed(ni)) {
++ if (ni->i_valid > new_size)
++ ni->i_valid = new_size;
++ } else {
++ err = block_truncate_page(inode->i_mapping, new_size,
++ ntfs_get_block);
++ if (err)
++ return err;
++ }
++
++ vcn = bytes_to_cluster(sbi, new_size);
++ new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size));
++
++ ni_lock(ni);
++
++ truncate_setsize(inode, new_size);
++
++ down_write(&ni->file.run_lock);
++ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
++ &new_valid, true, NULL);
++ up_write(&ni->file.run_lock);
++
++ if (new_valid < ni->i_valid)
++ ni->i_valid = new_valid;
++
++ ni_unlock(ni);
++
++ ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
++ inode->i_ctime = inode->i_mtime = current_time(inode);
++ if (!IS_DIRSYNC(inode)) {
++ dirty = 1;
++ } else {
++ err = ntfs_sync_inode(inode);
++ if (err)
++ return err;
++ }
++
++ if (dirty)
++ mark_inode_dirty(inode);
++
++ /*ntfs_flush_inodes(inode->i_sb, inode, NULL);*/
++
++ return 0;
++}
++
+/*
+ * Preallocate space for a file. This implements ntfs's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
@@ -1085,8 +1142,9 @@ index 000000000000..03e8eecc58b9
+ struct super_block *sb = inode->i_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct ntfs_inode *ni = ntfs_i(inode);
++ loff_t end = vbo + len;
++ loff_t vbo_down = round_down(vbo, PAGE_SIZE);
+ loff_t i_size;
-+ loff_t end;
+ int err;
+
+ /* No support for dir */
@@ -1098,88 +1156,135 @@ index 000000000000..03e8eecc58b9
+ FALLOC_FL_COLLAPSE_RANGE))
+ return -EOPNOTSUPP;
+
++ ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
++
+ inode_lock(inode);
+ i_size = inode->i_size;
+
++ if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
++ /* should never be here, see ntfs_file_open*/
++ err = -EOPNOTSUPP;
++ goto out;
++ }
++
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+ err = -EINVAL;
+ goto out;
+ }
-+ /*TODO: add support*/
-+ err = -EOPNOTSUPP;
-+ goto out;
-+ }
+
-+ if (mode & FALLOC_FL_COLLAPSE_RANGE) {
-+ if (mode & ~FALLOC_FL_COLLAPSE_RANGE) {
-+ err = -EINVAL;
++ if (!is_sparsed(ni) && !is_compressed(ni)) {
++ ntfs_inode_warn(
++ inode,
++ "punch_hole only for sparsed/compressed files");
++ err = -EOPNOTSUPP;
+ goto out;
+ }
+
-+ /*TODO: add support*/
-+ err = -EOPNOTSUPP;
-+ goto out;
-+ }
++ err = filemap_write_and_wait_range(inode->i_mapping, vbo,
++ end - 1);
++ if (err)
++ goto out;
+
-+ end = vbo + len;
++ err = filemap_write_and_wait_range(inode->i_mapping, end,
++ LLONG_MAX);
++ if (err)
++ goto out;
+
-+ ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
++ truncate_pagecache(inode, vbo_down);
+
-+ /*
-+ * normal file: allocate clusters, do not change 'valid' size
-+ */
-+ err = ntfs_set_size(inode, max(end, i_size));
-+ if (err)
-+ goto out;
++ ni_lock(ni);
++ err = attr_punch_hole(ni, vbo, len);
++ ni_unlock(ni);
++ } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
++ if (mode & ~FALLOC_FL_COLLAPSE_RANGE) {
++ err = -EINVAL;
++ goto out;
++ }
+
-+ if (is_sparsed(ni) || is_compressed(ni)) {
-+ CLST vcn_v = ni->i_valid >> sbi->cluster_bits;
-+ CLST vcn = vbo >> sbi->cluster_bits;
-+ CLST cend = bytes_to_cluster(sbi, end);
-+ CLST lcn, clen;
-+ bool new;
++ /*
++ * Write tail of the last page before removed range since
++ * it will get removed from the page cache below.
++ */
++ err = filemap_write_and_wait_range(inode->i_mapping, vbo_down,
++ vbo);
++ if (err)
++ goto out;
+
+ /*
-+ * allocate but not zero new clusters (see below comments)
-+ * this breaks security (one can read unused on-disk areas)
-+ * zeroing these clusters may be too long
-+ * may be we should check here for root rights?
++ * Write data that will be shifted to preserve them
++ * when discarding page cache below
+ */
-+ for (; vcn < cend; vcn += clen) {
-+ err = attr_data_get_block(ni, vcn, cend - vcn, &lcn,
-+ &clen, &new);
-+ if (err)
-+ goto out;
-+ if (!new || vcn >= vcn_v)
-+ continue;
++ err = filemap_write_and_wait_range(inode->i_mapping, end,
++ LLONG_MAX);
++ if (err)
++ goto out;
+
-+ /*
-+ * This variant zeroes new allocated clusters inside valid size
-+ * Dangerous in case:
-+ * 1G of sparsed clusters + 1 cluster of data =>
-+ * valid_size == 1G + 1 cluster
-+ * fallocate(1G) will zero 1G and this can be very long
-+ */
-+ /*ntfs_sparse_cluster(inode, NULL, vcn,
-+ * min(vcn_v - vcn, clen));
-+ */
-+ }
-+ }
++ truncate_pagecache(inode, vbo_down);
+
-+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ ni_lock(ni);
-+ /*true - keep preallocated*/
-+ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
-+ i_size, &ni->i_valid, true, NULL);
++ err = attr_collapse_range(ni, vbo, len);
+ ni_unlock(ni);
++ } else {
++ /*
++ * normal file: allocate clusters, do not change 'valid' size
++ */
++ err = ntfs_set_size(inode, max(end, i_size));
+ if (err)
+ goto out;
-+ }
+
-+ inode->i_ctime = inode->i_mtime = current_time(inode);
-+ mark_inode_dirty(inode);
++ if (is_sparsed(ni) || is_compressed(ni)) {
++ CLST vcn_v = ni->i_valid >> sbi->cluster_bits;
++ CLST vcn = vbo >> sbi->cluster_bits;
++ CLST cend = bytes_to_cluster(sbi, end);
++ CLST lcn, clen;
++ bool new;
++
++ /*
++ * allocate but not zero new clusters (see below comments)
++ * this breaks security (one can read unused on-disk areas)
++ * zeroing these clusters may be too long
++ * may be we should check here for root rights?
++ */
++ for (; vcn < cend; vcn += clen) {
++ err = attr_data_get_block(ni, vcn, cend - vcn,
++ &lcn, &clen, &new);
++ if (err)
++ goto out;
++ if (!new || vcn >= vcn_v)
++ continue;
++
++ /*
++ * Unwritten area
++ * NTFS is not able to store several unwritten areas
++ * Activate 'ntfs_sparse_cluster' to zero new allocated clusters
++ *
++ * Dangerous in case:
++ * 1G of sparsed clusters + 1 cluster of data =>
++ * valid_size == 1G + 1 cluster
++ * fallocate(1G) will zero 1G and this can be very long
++ * xfstest 016/086 will fail whithout 'ntfs_sparse_cluster'
++ */
++ /*ntfs_sparse_cluster(inode, NULL, vcn,
++ * min(vcn_v - vcn, clen));
++ */
++ }
++ }
++
++ if (mode & FALLOC_FL_KEEP_SIZE) {
++ ni_lock(ni);
++ /*true - keep preallocated*/
++ err = attr_set_size(ni, ATTR_DATA, NULL, 0,
++ &ni->file.run, i_size, &ni->i_valid,
++ true, NULL);
++ ni_unlock(ni);
++ }
++ }
+
++ if (!err) {
++ inode->i_ctime = inode->i_mtime = current_time(inode);
++ mark_inode_dirty(inode);
++ }
+out:
+ if (err == -EFBIG)
+ err = -ENOSPC;
@@ -1188,55 +1293,10 @@ index 000000000000..03e8eecc58b9
+ return err;
+}
+
-+void ntfs_truncate_blocks(struct inode *inode, loff_t new_size)
-+{
-+ struct super_block *sb = inode->i_sb;
-+ struct ntfs_sb_info *sbi = sb->s_fs_info;
-+ struct ntfs_inode *ni = ntfs_i(inode);
-+ int err, dirty = 0;
-+ u32 vcn;
-+ u64 new_valid;
-+
-+ if (!S_ISREG(inode->i_mode))
-+ return;
-+
-+ vcn = bytes_to_cluster(sbi, new_size);
-+ new_valid = ntfs_up_block(sb, min(ni->i_valid, new_size));
-+
-+ ni_lock(ni);
-+
-+ truncate_setsize(inode, new_size);
-+
-+ down_write(&ni->file.run_lock);
-+ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
-+ &new_valid, true, NULL);
-+ up_write(&ni->file.run_lock);
-+
-+ if (new_valid < ni->i_valid)
-+ ni->i_valid = new_valid;
-+
-+ ni_unlock(ni);
-+
-+ ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
-+ inode->i_ctime = inode->i_mtime = current_time(inode);
-+ if (!IS_DIRSYNC(inode)) {
-+ dirty = 1;
-+ } else {
-+ err = ntfs_sync_inode(inode);
-+ if (err)
-+ return;
-+ }
-+
-+ if (dirty)
-+ mark_inode_dirty(inode);
-+
-+ /*ntfs_flush_inodes(inode->i_sb, inode, NULL);*/
-+}
-+
+/*
+ * inode_operations::setattr
+ */
-+int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
++int ntfs3_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
@@ -1261,38 +1321,20 @@ index 000000000000..03e8eecc58b9
+ if (ia_valid & ATTR_SIZE) {
+ loff_t oldsize = inode->i_size;
+
-+ if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) {
-+#ifdef CONFIG_NTFS3_LZX_XPRESS
-+ err = ni_decompress_file(ni);
-+ if (err)
-+ goto out;
-+#else
-+ ntfs_inode_warn(
-+ inode,
-+ "activate CONFIG_NTFS3_LZX_XPRESS to truncate external compressed files");
++ if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
++ /* should never be here, see ntfs_file_open*/
+ err = -EOPNOTSUPP;
+ goto out;
-+#endif
+ }
+ inode_dio_wait(inode);
+
-+ if (attr->ia_size < oldsize) {
-+ if (is_compressed(ni)) {
-+ if (ni->i_valid > attr->ia_size)
-+ ni->i_valid = attr->ia_size;
-+ } else {
-+ err = block_truncate_page(inode->i_mapping,
-+ attr->ia_size,
-+ ntfs_get_block);
-+ if (err)
-+ goto out;
-+ }
-+ ntfs_truncate_blocks(inode, attr->ia_size);
-+ } else if (attr->ia_size > oldsize) {
-+ err = ntfs_extend_ex(inode, attr->ia_size, 0, NULL);
-+ if (err)
-+ goto out;
-+ }
++ if (attr->ia_size < oldsize)
++ err = ntfs_truncate(inode, attr->ia_size);
++ else if (attr->ia_size > oldsize)
++ err = ntfs_extend(inode, attr->ia_size, 0, NULL);
++
++ if (err)
++ goto out;
+
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+ }
@@ -1645,24 +1687,13 @@ index 000000000000..03e8eecc58b9
+ if (ret <= 0)
+ goto out;
+
-+ if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) {
-+#ifdef CONFIG_NTFS3_LZX_XPRESS
-+ int err = ni_decompress_file(ni);
-+
-+ if (err) {
-+ ret = err;
-+ goto out;
-+ }
-+#else
-+ ntfs_inode_warn(
-+ inode,
-+ "activate CONFIG_NTFS3_LZX_XPRESS to read external compressed files");
++ if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
++ /* should never be here, see ntfs_file_open*/
+ ret = -EOPNOTSUPP;
+ goto out;
-+#endif
+ }
+
-+ ret = ntfs_extend_ex(inode, iocb->ki_pos, ret, file);
++ ret = ntfs_extend(inode, iocb->ki_pos, ret, file);
+ if (ret)
+ goto out;
+
@@ -1690,6 +1721,22 @@ index 000000000000..03e8eecc58b9
+ return -EOPNOTSUPP;
+ }
+
++ /* Decompress "external compressed" file if opened for rw */
++ if ((ni->ni_flags & NI_FLAG_COMPRESSED_MASK) &&
++ (file->f_flags & (O_WRONLY | O_RDWR | O_TRUNC))) {
++#ifdef CONFIG_NTFS3_LZX_XPRESS
++ int err = ni_decompress_file(ni);
++
++ if (err)
++ return err;
++#else
++ ntfs_inode_warn(
++ inode,
++ "activate CONFIG_NTFS3_LZX_XPRESS to write external compressed files");
++ return -EOPNOTSUPP;
++#endif
++ }
++
+ return generic_file_open(inode, file);
+}
+
@@ -1738,7 +1785,7 @@ index 000000000000..03e8eecc58b9
+
+const struct inode_operations ntfs_file_inode_operations = {
+ .getattr = ntfs_getattr,
-+ .setattr = ntfs_setattr,
++ .setattr = ntfs3_setattr,
+ .listxattr = ntfs_listxattr,
+ .permission = ntfs_permission,
+ .get_acl = ntfs_get_acl,
@@ -1764,10 +1811,10 @@ index 000000000000..03e8eecc58b9
+};
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
new file mode 100644
-index 000000000000..4ebb80d012ef
+index 000000000000..f9e116d8e588
--- /dev/null
+++ b/fs/ntfs3/frecord.c
-@@ -0,0 +1,3085 @@
+@@ -0,0 +1,3088 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
@@ -1780,10 +1827,14 @@ index 000000000000..4ebb80d012ef
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
++#include <linux/vmalloc.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
++#ifdef CONFIG_NTFS3_LZX_XPRESS
++#include "lib/lib.h"
++#endif
+
+static inline void get_mi_ref(const struct mft_inode *mi, struct MFT_REF *ref)
+{
@@ -2040,14 +2091,17 @@ index 000000000000..4ebb80d012ef
+ * enumerates attributes in ntfs_inode
+ */
+struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr,
-+ struct ATTR_LIST_ENTRY **le)
++ struct ATTR_LIST_ENTRY **le,
++ struct mft_inode **mi)
+{
-+ struct mft_inode *mi;
++ struct mft_inode *mi2;
+ struct ATTR_LIST_ENTRY *le2;
+
+ /* Do we have an attribute list? */
+ if (!ni->attr_list.size) {
+ *le = NULL;
++ if (mi)
++ *mi = &ni->mi;
+ /* Enum attributes in primary record */
+ return mi_enum_attr(&ni->mi, attr);
+ }
@@ -2058,12 +2112,14 @@ index 000000000000..4ebb80d012ef
+ return NULL;
+
+ /* Load record that contains the required attribute */
-+ if (ni_load_mi(ni, le2, &mi))
++ if (ni_load_mi(ni, le2, &mi2))
+ return NULL;
+
++ if (mi)
++ *mi = mi2;
++
+ /* Find attribute in loaded record */
-+ attr = rec_find_attr_le(mi, le2);
-+ return attr;
++ return rec_find_attr_le(mi2, le2);
+}
+
+/*
@@ -2327,15 +2383,10 @@ index 000000000000..4ebb80d012ef
+
+ run_init(&run);
+
-+ while ((attr = ni_enum_attr_ex(ni, attr, &le))) {
++ while ((attr = ni_enum_attr_ex(ni, attr, &le, &mi))) {
+ if (!attr->non_res)
+ continue;
+
-+ if (ni_load_mi(ni, le, &mi)) {
-+ err = -EINVAL;
-+ break;
-+ }
-+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ if (svcn != le64_to_cpu(le->vcn)) {
+ err = -EINVAL;
@@ -3307,7 +3358,7 @@ index 000000000000..4ebb80d012ef
+ bool nt3 = is_ntfs3(sbi);
+ struct MFT_REF ref;
+
-+ while ((attr = ni_enum_attr_ex(ni, attr, &le))) {
++ while ((attr = ni_enum_attr_ex(ni, attr, &le, NULL))) {
+ if (!nt3 || attr->name_len) {
+ ;
+ } else if (attr->type == ATTR_REPARSE) {
@@ -3558,6 +3609,17 @@ index 000000000000..4ebb80d012ef
+ case IO_REPARSE_TAG_SYMLINK:
+ break;
+ case IO_REPARSE_TAG_COMPRESS:
++ /*
++ * WOF - Windows Overlay Filter - used to compress files with lzx/xpress
++ * Unlike native NTFS file compression, the Windows Overlay Filter supports
++ * only read operations. This means that it doesn’t need to sector-align each
++ * compressed chunk, so the compressed data can be packed more tightly together.
++ * If you open the file for writing, the Windows Overlay Filter just decompresses
++ * the entire file, turning it back into a plain file.
++ *
++ * ntfs3 driver decompresses the entire file only on write or change size requests
++ */
++
+ cmpr = &rp->CompressReparseBuffer;
+ if (len < sizeof(*cmpr) ||
+ cmpr->WofVersion != WOF_CURRENT_VERSION ||
@@ -3979,7 +4041,7 @@ index 000000000000..4ebb80d012ef
+ */
+ attr = NULL;
+ le = NULL;
-+ while ((attr = ni_enum_attr_ex(ni, attr, &le))) {
++ while ((attr = ni_enum_attr_ex(ni, attr, &le, NULL))) {
+ CLST svcn, evcn;
+ u32 asize, roff;
+
@@ -4091,57 +4153,49 @@ index 000000000000..4ebb80d012ef
+ }
+
+ err = 0;
-+ ctx = NULL;
-+ spin_lock(&sbi->compress.lock);
+ if (frame_size == 0x8000) {
++ mutex_lock(&sbi->compress.mtx_lzx);
+ /* LZX: frame compressed */
-+ if (!sbi->compress.lzx) {
++ ctx = sbi->compress.lzx;
++ if (!ctx) {
+ /* Lazy initialize lzx decompress context */
-+ spin_unlock(&sbi->compress.lock);
-+ ctx = lzx_allocate_decompressor(0x8000);
-+ if (!ctx)
-+ return -ENOMEM;
-+ if (IS_ERR(ctx)) {
-+ /* should never failed */
-+ err = PTR_ERR(ctx);
-+ goto out;
++ ctx = lzx_allocate_decompressor();
++ if (!ctx) {
++ err = -ENOMEM;
++ goto out1;
+ }
+
-+ spin_lock(&sbi->compress.lock);
-+ if (!sbi->compress.lzx) {
-+ sbi->compress.lzx = ctx;
-+ ctx = NULL;
-+ }
++ sbi->compress.lzx = ctx;
+ }
+
-+ if (lzx_decompress(sbi->compress.lzx, cmpr, cmpr_size, unc,
-+ unc_size)) {
++ if (lzx_decompress(ctx, cmpr, cmpr_size, unc, unc_size)) {
++ /* treat all errors as "invalid argument" */
+ err = -EINVAL;
+ }
++out1:
++ mutex_unlock(&sbi->compress.mtx_lzx);
+ } else {
+ /* XPRESS: frame compressed */
-+ if (!sbi->compress.xpress) {
++ mutex_lock(&sbi->compress.mtx_xpress);
++ ctx = sbi->compress.xpress;
++ if (!ctx) {
+ /* Lazy initialize xpress decompress context */
-+ spin_unlock(&sbi->compress.lock);
+ ctx = xpress_allocate_decompressor();
-+ if (!ctx)
-+ return -ENOMEM;
-+
-+ spin_lock(&sbi->compress.lock);
-+ if (!sbi->compress.xpress) {
-+ sbi->compress.xpress = ctx;
-+ ctx = NULL;
++ if (!ctx) {
++ err = -ENOMEM;
++ goto out2;
+ }
++
++ sbi->compress.xpress = ctx;
+ }
+
-+ if (xpress_decompress(sbi->compress.xpress, cmpr, cmpr_size,
-+ unc, unc_size)) {
++ if (xpress_decompress(ctx, cmpr, cmpr_size, unc, unc_size)) {
++ /* treat all errors as "invalid argument" */
+ err = -EINVAL;
+ }
++out2:
++ mutex_unlock(&sbi->compress.mtx_xpress);
+ }
-+ spin_unlock(&sbi->compress.lock);
-+out:
-+ ntfs_free(ctx);
+ return err;
+}
+#endif
@@ -4460,8 +4514,7 @@ index 000000000000..4ebb80d012ef
+ goto out;
+ }
+
-+ if (!is_attr_compressed(attr)) {
-+ WARN_ON(1);
++ if (WARN_ON(!is_attr_compressed(attr))) {
+ err = -EINVAL;
+ goto out;
+ }
@@ -4522,10 +4575,9 @@ index 000000000000..4ebb80d012ef
+ goto out2;
+ }
+
-+ spin_lock(&sbi->compress.lock);
++ mutex_lock(&sbi->compress.mtx_lznt);
+ lznt = NULL;
+ if (!sbi->compress.lznt) {
-+ spin_unlock(&sbi->compress.lock);
+ /*
+ * lznt implements two levels of compression:
+ * 0 - standard compression
@@ -4534,21 +4586,19 @@ index 000000000000..4ebb80d012ef
+ */
+ lznt = get_lznt_ctx(0);
+ if (!lznt) {
++ mutex_unlock(&sbi->compress.mtx_lznt);
+ err = -ENOMEM;
+ goto out3;
+ }
+
-+ spin_lock(&sbi->compress.lock);
-+ if (!sbi->compress.lznt) {
-+ sbi->compress.lznt = lznt;
-+ lznt = NULL;
-+ }
++ sbi->compress.lznt = lznt;
++ lznt = NULL;
+ }
+
+ /* compress: frame_mem -> frame_ondisk */
+ compr_size = compress_lznt(frame_mem, frame_size, frame_ondisk,
+ frame_size, sbi->compress.lznt);
-+ spin_unlock(&sbi->compress.lock);
++ mutex_unlock(&sbi->compress.mtx_lznt);
+ ntfs_free(lznt);
+
+ if (compr_size + sbi->cluster_size > frame_size) {
@@ -4855,7 +4905,7 @@ index 000000000000..4ebb80d012ef
+}
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
new file mode 100644
-index 000000000000..e41d09dd419d
+index 000000000000..783a2a6598f5
--- /dev/null
+++ b/fs/ntfs3/namei.c
@@ -0,0 +1,590 @@
@@ -5409,7 +5459,7 @@ index 000000000000..e41d09dd419d
+ return err;
+}
+
-+struct dentry *ntfs_get_parent(struct dentry *child)
++struct dentry *ntfs3_get_parent(struct dentry *child)
+{
+ struct inode *inode = d_inode(child);
+ struct ntfs_inode *ni = ntfs_i(inode);
@@ -5443,7 +5493,7 @@ index 000000000000..e41d09dd419d
+ .permission = ntfs_permission,
+ .get_acl = ntfs_get_acl,
+ .set_acl = ntfs_set_acl,
-+ .setattr = ntfs_setattr,
++ .setattr = ntfs3_setattr,
+ .getattr = ntfs_getattr,
+ .listxattr = ntfs_listxattr,
+ .atomic_open = ntfs_atomic_open,
@@ -6071,10 +6121,10 @@ index 000000000000..cca71ca82657
+}
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
new file mode 100644
-index 000000000000..32d43ecb58fd
+index 000000000000..2b1eeb980554
--- /dev/null
+++ b/fs/ntfs3/run.c
-@@ -0,0 +1,1192 @@
+@@ -0,0 +1,1254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
@@ -6558,6 +6608,68 @@ index 000000000000..32d43ecb58fd
+ return true;
+}
+
++/*helper for attr_collapse_range, which is helper for fallocate(collapse_range)*/
++bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len)
++{
++ size_t index, eat;
++ struct ntfs_run *r, *e, *eat_start, *eat_end;
++ CLST end;
++
++ if (WARN_ON(!run_lookup(run, vcn, &index)))
++ return true; /* should never be here */
++
++ e = run->runs_ + run->count;
++ r = run->runs_ + index;
++ end = vcn + len;
++
++ if (vcn > r->vcn) {
++ if (r->vcn + r->len <= end) {
++ /* collapse tail of run */
++ r->len = vcn - r->vcn;
++ } else if (r->lcn == SPARSE_LCN) {
++ /* collapse a middle part of sparsed run */
++ r->len -= len;
++ } else {
++ /* collapse a middle part of normal run, split */
++ if (!run_add_entry(run, vcn, SPARSE_LCN, len, false))
++ return false;
++ return run_collapse_range(run, vcn, len);
++ }
++
++ r += 1;
++ }
++
++ eat_start = r;
++ eat_end = r;
++
++ for (; r < e; r++) {
++ CLST d;
++
++ if (r->vcn >= end) {
++ r->vcn -= len;
++ continue;
++ }
++
++ if (r->vcn + r->len <= end) {
++ /* eat this run */
++ eat_end = r + 1;
++ continue;
++ }
++
++ d = end - r->vcn;
++ if (r->lcn != SPARSE_LCN)
++ r->lcn += d;
++ r->len -= d;
++ r->vcn -= len - d;
++ }
++
++ eat = eat_end - eat_start;
++ memmove(eat_start, eat_end, (e - eat_end) * sizeof(*r));
++ run->count -= eat;
++
++ return true;
++}
++
+/*
+ * run_get_entry
+ *
diff --git a/PATCH-v14-05-10-fs-ntfs3-Add-attrib-operations.patch b/PATCH-v16-05-10-fs-ntfs3-Add-attrib-operations.patch
similarity index 86%
rename from PATCH-v14-05-10-fs-ntfs3-Add-attrib-operations.patch
rename to PATCH-v16-05-10-fs-ntfs3-Add-attrib-operations.patch
index aca81f5..1aaadff 100644
--- a/PATCH-v14-05-10-fs-ntfs3-Add-attrib-operations.patch
+++ b/PATCH-v16-05-10-fs-ntfs3-Add-attrib-operations.patch
@@ -8,34 +8,34 @@ X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id 9B14FC2BB3F
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:26 +0000 (UTC)
+ by smtp.lore.kernel.org (Postfix) with ESMTP id 3292DC43381
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:26 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id 74F5622C97
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:26 +0000 (UTC)
+ by mail.kernel.org (Postfix) with ESMTP id DEFCB233E2
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:25 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730863AbgLDPtM (ORCPT
+ id S1727936AbgLYNyS (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:49:12 -0500
-Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:37126 "EHLO
- relaydlg-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730723AbgLDPtE (ORCPT
+ Fri, 25 Dec 2020 08:54:18 -0500
+Received: from relayfre-01.paragon-software.com ([176.12.100.13]:47292 "EHLO
+ relayfre-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
+ by vger.kernel.org with ESMTP id S1726259AbgLYNyM (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:49:04 -0500
+ Fri, 25 Dec 2020 08:54:12 -0500
Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id C833282246;
- Fri, 4 Dec 2020 18:48:07 +0300 (MSK)
+ by relayfre-01.paragon-software.com (Postfix) with ESMTPS id 6FA541D70;
+ Fri, 25 Dec 2020 16:53:27 +0300 (MSK)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096887;
- bh=Mb60JkeMavnyjfk/pXYtNu2SmXwwz9TqXJFhSIz0n0Y=;
+ d=paragon-software.com; s=mail; t=1608904407;
+ bh=kaFxxQCzpA7g331MwhWa15TjMkvMFdTnqdL+IrCqhn4=;
h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=E0iRQ0qxFWDosE7qiO+Auaq12bFiWsu3dL65e4qcfHqxu4Z0PrulvN7CQhhMT8+V9
- MjgDrOBU3UR3p7nBQ1jZ2hJ+eIdhI2PA5OgPahYWw1GrAL74FJP1LE0088U61mAikB
- nzodw5epZltQfRgZnT/1B+eg1s4qgGG+YNfoXGrQ=
+ b=fn8U3Y1FFUHsu8vR0Elt5saEyqFJ6/YRZUg6HVm8v52eM6MAurJYCBK+2S5c0C3u7
+ FJt5JHuG3GfFth6/ujfNQa0JJ7fYgsi/NiRA2qS+j7JZEppIkyyuIIdswjxezL/V0X
+ T/Yg4AwS+wS7cc792Sh/NnBBQFCrjLJEAD9kLsSE=
Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:07 +0300
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:26 +0300
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
To: <linux-fsdevel@vger.kernel.org>
CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
@@ -45,12 +45,12 @@ CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
<linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
<dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 05/10] fs/ntfs3: Add attrib operations
-Date: Fri, 4 Dec 2020 18:45:55 +0300
-Message-ID: <20201204154600.1546096-6-almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 05/10] fs/ntfs3: Add attrib operations
+Date: Fri, 25 Dec 2020 16:51:14 +0300
+Message-ID: <20201225135119.3666763-6-almaz.alexandrovich@paragon-software.com>
X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
@@ -60,7 +60,7 @@ X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-6-almaz.alexandrovich@paragon-software.com/>
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-6-almaz.alexandrovich@paragon-software.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
@@ -68,20 +68,20 @@ This adds attrib operations
Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
---
- fs/ntfs3/attrib.c | 1682 +++++++++++++++++++++++++++++++++++++++++++
- fs/ntfs3/attrlist.c | 463 ++++++++++++
- fs/ntfs3/xattr.c | 1073 +++++++++++++++++++++++++++
- 3 files changed, 3218 insertions(+)
+ fs/ntfs3/attrib.c | 2080 +++++++++++++++++++++++++++++++++++++++++++
+ fs/ntfs3/attrlist.c | 463 ++++++++++
+ fs/ntfs3/xattr.c | 1072 ++++++++++++++++++++++
+ 3 files changed, 3615 insertions(+)
create mode 100644 fs/ntfs3/attrib.c
create mode 100644 fs/ntfs3/attrlist.c
create mode 100644 fs/ntfs3/xattr.c
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
new file mode 100644
-index 000000000000..b6340181d68b
+index 000000000000..3f4c847c4962
--- /dev/null
+++ b/fs/ntfs3/attrib.c
-@@ -0,0 +1,1682 @@
+@@ -0,0 +1,2080 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
@@ -252,7 +252,7 @@ index 000000000000..b6340181d68b
+
+out:
+ if (done)
-+ *done = dn;
++ *done += dn;
+
+ return err;
+}
@@ -770,6 +770,7 @@ index 000000000000..b6340181d68b
+ vcn = max(svcn, new_alen);
+ new_alloc_tmp = (u64)vcn << cluster_bits;
+
++ alen = 0;
+ err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &alen,
+ true);
+ if (err)
@@ -1764,6 +1765,403 @@ index 000000000000..b6340181d68b
+
+ return err;
+}
++
++/* Collapse range in file */
++int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
++{
++ int err = 0;
++ struct runs_tree *run = &ni->file.run;
++ struct ntfs_sb_info *sbi = ni->mi.sbi;
++ struct ATTRIB *attr, *attr_b;
++ struct ATTR_LIST_ENTRY *le, *le_b;
++ struct mft_inode *mi, *mi_b;
++ CLST svcn, evcn1, len, dealloc, alen;
++ CLST vcn, end;
++ u64 valid_size, data_size, alloc_size, total_size;
++ u32 mask;
++ __le16 a_flags;
++
++ if (!bytes)
++ return 0;
++
++ le_b = NULL;
++ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
++ if (!attr_b)
++ return -ENOENT;
++
++ if (!attr_b->non_res) {
++ /* Attribute is resident. Nothing to do? */
++ return 0;
++ }
++
++ data_size = le64_to_cpu(attr_b->nres.data_size);
++ valid_size = le64_to_cpu(attr_b->nres.valid_size);
++ alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
++ a_flags = attr_b->flags;
++
++ if (is_attr_ext(attr_b)) {
++ total_size = le64_to_cpu(attr_b->nres.total_size);
++ mask = (1u << (attr_b->nres.c_unit + sbi->cluster_bits)) - 1;
++ } else {
++ total_size = alloc_size;
++ mask = sbi->cluster_mask;
++ }
++
++ if (vbo & mask)
++ return -EINVAL;
++
++ if (bytes & mask)
++ return -EINVAL;
++
++ if (vbo > data_size)
++ return -EINVAL;
++
++ down_write(&ni->file.run_lock);
++
++ if (vbo + bytes >= data_size) {
++ u64 new_valid = min(ni->i_valid, vbo);
++
++ /* Simple truncate file at 'vbo' */
++ truncate_setsize(&ni->vfs_inode, vbo);
++ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, vbo,
++ &new_valid, true, NULL);
++
++ if (!err && new_valid < ni->i_valid)
++ ni->i_valid = new_valid;
++
++ goto out;
++ }
++
++ /*
++ * Enumerate all attribute segments and collapse
++ */
++ alen = alloc_size >> sbi->cluster_bits;
++ vcn = vbo >> sbi->cluster_bits;
++ len = bytes >> sbi->cluster_bits;
++ end = vcn + len;
++ dealloc = 0;
++
++ svcn = le64_to_cpu(attr_b->nres.svcn);
++ evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
++
++ if (svcn <= vcn && vcn < evcn1) {
++ attr = attr_b;
++ le = le_b;
++ mi = mi_b;
++ } else if (!le_b) {
++ err = -EINVAL;
++ goto out;
++ } else {
++ le = le_b;
++ attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
++ &mi);
++ if (!attr) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ svcn = le64_to_cpu(attr->nres.svcn);
++ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
++ }
++
++ for (;;) {
++ if (svcn >= end) {
++ /* shift vcn */
++ attr->nres.svcn = cpu_to_le64(svcn - len);
++ attr->nres.evcn = cpu_to_le64(evcn1 - 1 - len);
++ if (le) {
++ le->vcn = attr->nres.svcn;
++ ni->attr_list.dirty = true;
++ }
++ mi->dirty = true;
++ } else if (svcn < vcn || end < evcn1) {
++ CLST vcn1, eat, next_svcn;
++
++ /* collapse a part of this attribute segment */
++ err = attr_load_runs(attr, ni, run, &svcn);
++ if (err)
++ goto out;
++ vcn1 = max(vcn, svcn);
++ eat = min(end, evcn1) - vcn1;
++
++ err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc,
++ true);
++ if (err)
++ goto out;
++
++ if (!run_collapse_range(run, vcn1, eat)) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ if (svcn >= vcn) {
++ /* shift vcn */
++ attr->nres.svcn = cpu_to_le64(vcn);
++ if (le) {
++ le->vcn = attr->nres.svcn;
++ ni->attr_list.dirty = true;
++ }
++ }
++
++ err = mi_pack_runs(mi, attr, run, evcn1 - svcn - eat);
++ if (err)
++ goto out;
++
++ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
++ if (next_svcn + eat < evcn1) {
++ err = ni_insert_nonresident(
++ ni, ATTR_DATA, NULL, 0, run, next_svcn,
++ evcn1 - eat - next_svcn, a_flags, &attr,
++ &mi);
++ if (err)
++ goto out;
++
++ /* layout of records maybe changed */
++ attr_b = NULL;
++ le = al_find_ex(ni, NULL, ATTR_DATA, NULL, 0,
++ &next_svcn);
++ if (!le) {
++ err = -EINVAL;
++ goto out;
++ }
++ }
++
++ /* free all allocated memory */
++ run_truncate(run, 0);
++ } else {
++ u16 le_sz;
++ u16 roff = le16_to_cpu(attr->nres.run_off);
++
++ /*run==1 means unpack and deallocate*/
++ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn,
++ evcn1 - 1, svcn, Add2Ptr(attr, roff),
++ le32_to_cpu(attr->size) - roff);
++
++ /* delete this attribute segment */
++ mi_remove_attr(mi, attr);
++ if (!le)
++ break;
++
++ le_sz = le16_to_cpu(le->size);
++ if (!al_remove_le(ni, le)) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ if (evcn1 >= alen)
++ break;
++
++ if (!svcn) {
++ /* Load next record that contains this attribute */
++ if (ni_load_mi(ni, le, &mi)) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ /* Look for required attribute */
++ attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL,
++ 0, &le->id);
++ if (!attr) {
++ err = -EINVAL;
++ goto out;
++ }
++ goto next_attr;
++ }
++ le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz);
++ }
++
++ if (evcn1 >= alen)
++ break;
++
++ attr = ni_enum_attr_ex(ni, attr, &le, &mi);
++ if (!attr) {
++ err = -EINVAL;
++ goto out;
++ }
++
++next_attr:
++ svcn = le64_to_cpu(attr->nres.svcn);
++ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
++ }
++
++ if (vbo + bytes <= valid_size)
++ valid_size -= bytes;
++ else if (vbo < valid_size)
++ valid_size = vbo;
++
++ if (!attr_b) {
++ le_b = NULL;
++ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL,
++ &mi_b);
++ if (!attr_b) {
++ err = -ENOENT;
++ goto out;
++ }
++ }
++
++ attr_b->nres.alloc_size = cpu_to_le64(alloc_size - bytes);
++ attr_b->nres.data_size = cpu_to_le64(data_size - bytes);
++ attr_b->nres.valid_size = cpu_to_le64(valid_size);
++ total_size -= (u64)dealloc << sbi->cluster_bits;
++ if (is_attr_ext(attr_b))
++ attr_b->nres.total_size = cpu_to_le64(total_size);
++ mi_b->dirty = true;
++
++ /*update inode size*/
++ ni->i_valid = valid_size;
++ ni->vfs_inode.i_size = data_size - bytes;
++ inode_set_bytes(&ni->vfs_inode, total_size);
++ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
++ mark_inode_dirty(&ni->vfs_inode);
++
++out:
++ up_write(&ni->file.run_lock);
++ if (err)
++ make_bad_inode(&ni->vfs_inode);
++
++ return err;
++}
++
++/* not for normal files */
++int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes)
++{
++ int err = 0;
++ struct runs_tree *run = &ni->file.run;
++ struct ntfs_sb_info *sbi = ni->mi.sbi;
++ struct ATTRIB *attr, *attr_b;
++ struct ATTR_LIST_ENTRY *le, *le_b;
++ struct mft_inode *mi, *mi_b;
++ CLST svcn, evcn1, vcn, len, end, alen, dealloc;
++ u64 total_size, alloc_size;
++
++ if (!bytes)
++ return 0;
++
++ le_b = NULL;
++ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
++ if (!attr_b)
++ return -ENOENT;
++
++ if (!attr_b->non_res) {
++ u32 data_size = le32_to_cpu(attr->res.data_size);
++ u32 from, to;
++
++ if (vbo > data_size)
++ return 0;
++
++ from = vbo;
++ to = (vbo + bytes) < data_size ? (vbo + bytes) : data_size;
++ memset(Add2Ptr(resident_data(attr_b), from), 0, to - from);
++ return 0;
++ }
++
++ /* TODO: add support for normal files too */
++ if (!is_attr_ext(attr_b))
++ return -EOPNOTSUPP;
++
++ alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
++ total_size = le64_to_cpu(attr_b->nres.total_size);
++
++ if (vbo >= alloc_size) {
++ // NOTE: it is allowed
++ return 0;
++ }
++
++ if (vbo + bytes > alloc_size)
++ bytes = alloc_size - vbo;
++
++ down_write(&ni->file.run_lock);
++ /*
++ * Enumerate all attribute segments and punch hole where necessary
++ */
++ alen = alloc_size >> sbi->cluster_bits;
++ vcn = vbo >> sbi->cluster_bits;
++ len = bytes >> sbi->cluster_bits;
++ end = vcn + len;
++ dealloc = 0;
++
++ svcn = le64_to_cpu(attr_b->nres.svcn);
++ evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
++
++ if (svcn <= vcn && vcn < evcn1) {
++ attr = attr_b;
++ le = le_b;
++ mi = mi_b;
++ } else if (!le_b) {
++ err = -EINVAL;
++ goto out;
++ } else {
++ le = le_b;
++ attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
++ &mi);
++ if (!attr) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ svcn = le64_to_cpu(attr->nres.svcn);
++ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
++ }
++
++ while (svcn < end) {
++ CLST vcn1, zero, dealloc2;
++
++ err = attr_load_runs(attr, ni, run, &svcn);
++ if (err)
++ goto out;
++ vcn1 = max(vcn, svcn);
++ zero = min(end, evcn1) - vcn1;
++
++ dealloc2 = dealloc;
++ err = run_deallocate_ex(sbi, run, vcn1, zero, &dealloc, true);
++ if (err)
++ goto out;
++
++ if (dealloc2 == dealloc) {
++ /* looks like the required range is already sparsed */
++ } else {
++ if (!run_add_entry(run, vcn1, SPARSE_LCN, zero,
++ false)) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ err = mi_pack_runs(mi, attr, run, evcn1 - svcn);
++ if (err)
++ goto out;
++ }
++ /* free all allocated memory */
++ run_truncate(run, 0);
++
++ if (evcn1 >= alen)
++ break;
++
++ attr = ni_enum_attr_ex(ni, attr, &le, &mi);
++ if (!attr) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ svcn = le64_to_cpu(attr->nres.svcn);
++ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
++ }
++
++ total_size -= (u64)dealloc << sbi->cluster_bits;
++ attr_b->nres.total_size = cpu_to_le64(total_size);
++ mi_b->dirty = true;
++
++ /*update inode size*/
++ inode_set_bytes(&ni->vfs_inode, total_size);
++ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
++ mark_inode_dirty(&ni->vfs_inode);
++
++out:
++ up_write(&ni->file.run_lock);
++ if (err)
++ make_bad_inode(&ni->vfs_inode);
++
++ return err;
++}
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
new file mode 100644
index 000000000000..e94512259354
@@ -2235,10 +2633,10 @@ index 000000000000..e94512259354
+}
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
new file mode 100644
-index 000000000000..06eafeb0436f
+index 000000000000..86f95c166a62
--- /dev/null
+++ b/fs/ntfs3/xattr.c
-@@ -0,0 +1,1073 @@
+@@ -0,0 +1,1072 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
@@ -2713,6 +3111,7 @@ index 000000000000..06eafeb0436f
+ return err;
+}
+
++#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+static inline void ntfs_posix_acl_release(struct posix_acl *acl)
+{
+ if (acl && refcount_dec_and_test(&acl->a_refcount))
@@ -2911,14 +3310,71 @@ index 000000000000..06eafeb0436f
+}
+
+/*
++ * Initialize the ACLs of a new inode. Called from ntfs_create_inode.
++ */
++int ntfs_init_acl(struct inode *inode, struct inode *dir)
++{
++ struct posix_acl *default_acl, *acl;
++ int err;
++
++ /*
++ * TODO refactoring lock
++ * ni_lock(dir) ... -> posix_acl_create(dir,...) -> ntfs_get_acl -> ni_lock(dir)
++ */
++ inode->i_default_acl = NULL;
++
++ default_acl = ntfs_get_acl_ex(dir, ACL_TYPE_DEFAULT, 1);
++
++ if (!default_acl || default_acl == ERR_PTR(-EOPNOTSUPP)) {
++ inode->i_mode &= ~current_umask();
++ err = 0;
++ goto out;
++ }
++
++ if (IS_ERR(default_acl)) {
++ err = PTR_ERR(default_acl);
++ goto out;
++ }
++
++ acl = default_acl;
++ err = __posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
++ if (err < 0)
++ goto out1;
++ if (!err) {
++ posix_acl_release(acl);
++ acl = NULL;
++ }
++
++ if (!S_ISDIR(inode->i_mode)) {
++ posix_acl_release(default_acl);
++ default_acl = NULL;
++ }
++
++ if (default_acl)
++ err = ntfs_set_acl_ex(inode, default_acl, ACL_TYPE_DEFAULT, 1);
++
++ if (!acl)
++ inode->i_acl = NULL;
++ else if (!err)
++ err = ntfs_set_acl_ex(inode, acl, ACL_TYPE_ACCESS, 1);
++
++ posix_acl_release(acl);
++out1:
++ posix_acl_release(default_acl);
++
++out:
++ return err;
++}
++#endif
++
++/*
+ * ntfs_acl_chmod
+ *
-+ * helper for 'ntfs_setattr'
++ * helper for 'ntfs3_setattr'
+ */
+int ntfs_acl_chmod(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
-+ int err;
+
+ if (!(sb->s_flags & SB_POSIXACL))
+ return 0;
@@ -2926,9 +3382,7 @@ index 000000000000..06eafeb0436f
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
-+ err = posix_acl_chmod(inode, inode->i_mode);
-+
-+ return err;
++ return posix_acl_chmod(inode, inode->i_mode);
+}
+
+/*
@@ -2938,18 +3392,12 @@ index 000000000000..06eafeb0436f
+ */
+int ntfs_permission(struct inode *inode, int mask)
+{
-+ struct super_block *sb = inode->i_sb;
-+ struct ntfs_sb_info *sbi = sb->s_fs_info;
-+ int err;
-+
-+ if (sbi->options.no_acs_rules) {
++ if (ntfs_sb(inode->i_sb)->options.no_acs_rules) {
+ /* "no access rules" mode - allow all changes */
+ return 0;
+ }
+
-+ err = generic_permission(inode, mask);
-+
-+ return err;
++ return generic_permission(inode, mask);
+}
+
+/*
@@ -3074,6 +3522,7 @@ index 000000000000..06eafeb0436f
+ goto out;
+ }
+
++#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
@@ -3086,9 +3535,11 @@ index 000000000000..06eafeb0436f
+ ACL_TYPE_ACCESS :
+ ACL_TYPE_DEFAULT,
+ buffer, size);
-+ } else {
-+ err = ntfs_getxattr_hlp(inode, name, buffer, size, NULL);
++ goto out;
+ }
++#endif
++ /* deal with ntfs extended attribute */
++ err = ntfs_getxattr_hlp(inode, name, buffer, size, NULL);
+
+out:
+ return err;
@@ -3219,6 +3670,7 @@ index 000000000000..06eafeb0436f
+ goto out;
+ }
+
++#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
@@ -3231,66 +3683,11 @@ index 000000000000..06eafeb0436f
+ ACL_TYPE_ACCESS :
+ ACL_TYPE_DEFAULT,
+ value, size);
-+ } else {
-+ err = ntfs_set_ea(inode, name, value, size, flags, 0);
-+ }
-+
-+out:
-+ return err;
-+}
-+
-+/*
-+ * Initialize the ACLs of a new inode. Called from ntfs_create_inode.
-+ */
-+int ntfs_init_acl(struct inode *inode, struct inode *dir)
-+{
-+ struct posix_acl *default_acl, *acl;
-+ int err;
-+
-+ /*
-+ * TODO refactoring lock
-+ * ni_lock(dir) ... -> posix_acl_create(dir,...) -> ntfs_get_acl -> ni_lock(dir)
-+ */
-+ inode->i_default_acl = NULL;
-+
-+ default_acl = ntfs_get_acl_ex(dir, ACL_TYPE_DEFAULT, 1);
-+
-+ if (!default_acl || default_acl == ERR_PTR(-EOPNOTSUPP)) {
-+ inode->i_mode &= ~current_umask();
-+ err = 0;
-+ goto out;
-+ }
-+
-+ if (IS_ERR(default_acl)) {
-+ err = PTR_ERR(default_acl);
+ goto out;
+ }
-+
-+ acl = default_acl;
-+ err = __posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
-+ if (err < 0)
-+ goto out1;
-+ if (!err) {
-+ posix_acl_release(acl);
-+ acl = NULL;
-+ }
-+
-+ if (!S_ISDIR(inode->i_mode)) {
-+ posix_acl_release(default_acl);
-+ default_acl = NULL;
-+ }
-+
-+ if (default_acl)
-+ err = ntfs_set_acl_ex(inode, default_acl, ACL_TYPE_DEFAULT, 1);
-+
-+ if (!acl)
-+ inode->i_acl = NULL;
-+ else if (!err)
-+ err = ntfs_set_acl_ex(inode, acl, ACL_TYPE_ACCESS, 1);
-+
-+ posix_acl_release(acl);
-+out1:
-+ posix_acl_release(default_acl);
++#endif
++ /* deal with ntfs extended attribute */
++ err = ntfs_set_ea(inode, name, value, size, flags, 0);
+
+out:
+ return err;
diff --git a/PATCH-v16-06-10-fs-ntfs3-Add-compression.patch b/PATCH-v16-06-10-fs-ntfs3-Add-compression.patch
new file mode 100644
index 0000000..0c9d94e
--- /dev/null
+++ b/PATCH-v16-06-10-fs-ntfs3-Add-compression.patch
@@ -0,0 +1,2131 @@
+From mboxrd@z Thu Jan 1 00:00:00 1970
+Return-Path: <linux-kernel-owner@kernel.org>
+X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
+ aws-us-west-2-korg-lkml-1.web.codeaurora.org
+X-Spam-Level:
+X-Spam-Status: No, score=-23.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
+ DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,
+ INCLUDES_PATCH,MAILING_LIST_MULTI,MENTIONS_GIT_HOSTING,SPF_HELO_NONE,SPF_PASS,
+ URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0
+Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
+ by smtp.lore.kernel.org (Postfix) with ESMTP id 77B48C433E6
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:55:12 +0000 (UTC)
+Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
+ by mail.kernel.org (Postfix) with ESMTP id 51B1123130
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:55:12 +0000 (UTC)
+Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
+ id S1729363AbgLYNyu (ORCPT
+ <rfc822;linux-kernel@archiver.kernel.org>);
+ Fri, 25 Dec 2020 08:54:50 -0500
+Received: from relayfre-01.paragon-software.com ([176.12.100.13]:47316 "EHLO
+ relayfre-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
+ by vger.kernel.org with ESMTP id S1726291AbgLYNyL (ORCPT
+ <rfc822;linux-kernel@vger.kernel.org>);
+ Fri, 25 Dec 2020 08:54:11 -0500
+Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
+ by relayfre-01.paragon-software.com (Postfix) with ESMTPS id 8EB711D71;
+ Fri, 25 Dec 2020 16:53:27 +0300 (MSK)
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
+ d=paragon-software.com; s=mail; t=1608904407;
+ bh=QVKUxaz4zmOd9xSGGms1ZNGnJjBJSjVXGoT9ebjYpyo=;
+ h=From:To:CC:Subject:Date:In-Reply-To:References;
+ b=Fs042elzVB0ZGPcuIkBFG0C95AkazF3CNA11bX2Ule5N3yM9hWVj3RTaqoXPsbJdc
+ tDdDxsXe9P1UE1dICCKGHEbGwFrPIP5+QYnU21Eu/bH6N3IqWB68YC8msDuU3QE08p
+ pWK0e+LfsROLhN2HJxqFvxUBDMuXedfyhpaWqcrE=
+Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
+ vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
+ (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:27 +0300
+From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+To: <linux-fsdevel@vger.kernel.org>
+CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
+ <pali@kernel.org>, <dsterba@suse.cz>, <aaptel@suse.com>,
+ <willy@infradead.org>, <rdunlap@infradead.org>, <joe@perches.com>,
+ <mark@harmstone.com>, <nborisov@suse.com>,
+ <linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
+ <dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
+ Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 06/10] fs/ntfs3: Add compression
+Date: Fri, 25 Dec 2020 16:51:15 +0300
+Message-ID: <20201225135119.3666763-7-almaz.alexandrovich@paragon-software.com>
+X-Mailer: git-send-email 2.25.4
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+MIME-Version: 1.0
+Content-Transfer-Encoding: 8bit
+Content-Type: text/plain
+X-Originating-IP: [172.30.114.105]
+X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
+ vdlg-exch-02.paragon-software.com (172.30.1.105)
+Precedence: bulk
+List-ID: <linux-kernel.vger.kernel.org>
+X-Mailing-List: linux-kernel@vger.kernel.org
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-7-almaz.alexandrovich@paragon-software.com/>
+List-Archive: <https://lore.kernel.org/lkml/>
+List-Post: <mailto:linux-kernel@vger.kernel.org>
+
+This patch adds different types of NTFS-applicable compressions:
+- lznt
+- lzx
+- xpress
+Latter two (lzx, xpress) implement Windows Compact OS feature and
+were taken from ntfs-3g system comression plugin authored by Eric Biggers
+(https://github.com/ebiggers/ntfs-3g-system-compression)
+which were ported to ntfs3 and adapted to Linux Kernel environment.
+
+Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+---
+ fs/ntfs3/lib/decompress_common.c | 332 +++++++++++++++
+ fs/ntfs3/lib/decompress_common.h | 352 ++++++++++++++++
+ fs/ntfs3/lib/lib.h | 26 ++
+ fs/ntfs3/lib/lzx_decompress.c | 683 +++++++++++++++++++++++++++++++
+ fs/ntfs3/lib/xpress_decompress.c | 155 +++++++
+ fs/ntfs3/lznt.c | 452 ++++++++++++++++++++
+ 6 files changed, 2000 insertions(+)
+ create mode 100644 fs/ntfs3/lib/decompress_common.c
+ create mode 100644 fs/ntfs3/lib/decompress_common.h
+ create mode 100644 fs/ntfs3/lib/lib.h
+ create mode 100644 fs/ntfs3/lib/lzx_decompress.c
+ create mode 100644 fs/ntfs3/lib/xpress_decompress.c
+ create mode 100644 fs/ntfs3/lznt.c
+
+diff --git a/fs/ntfs3/lib/decompress_common.c b/fs/ntfs3/lib/decompress_common.c
+new file mode 100644
+index 000000000000..83c9e93aea77
+--- /dev/null
++++ b/fs/ntfs3/lib/decompress_common.c
+@@ -0,0 +1,332 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * decompress_common.c - Code shared by the XPRESS and LZX decompressors
++ *
++ * Copyright (C) 2015 Eric Biggers
++ *
++ * This program is free software: you can redistribute it and/or modify it under
++ * the terms of the GNU General Public License as published by the Free Software
++ * Foundation, either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
++ * details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program. If not, see <http://www.gnu.org/licenses/>.
++ */
++
++#include "decompress_common.h"
++
++/*
++ * make_huffman_decode_table() -
++ *
++ * Build a decoding table for a canonical prefix code, or "Huffman code".
++ *
++ * This is an internal function, not part of the library API!
++ *
++ * This takes as input the length of the codeword for each symbol in the
++ * alphabet and produces as output a table that can be used for fast
++ * decoding of prefix-encoded symbols using read_huffsym().
++ *
++ * Strictly speaking, a canonical prefix code might not be a Huffman
++ * code. But this algorithm will work either way; and in fact, since
++ * Huffman codes are defined in terms of symbol frequencies, there is no
++ * way for the decompressor to know whether the code is a true Huffman
++ * code or not until all symbols have been decoded.
++ *
++ * Because the prefix code is assumed to be "canonical", it can be
++ * reconstructed directly from the codeword lengths. A prefix code is
++ * canonical if and only if a longer codeword never lexicographically
++ * precedes a shorter codeword, and the lexicographic ordering of
++ * codewords of the same length is the same as the lexicographic ordering
++ * of the corresponding symbols. Consequently, we can sort the symbols
++ * primarily by codeword length and secondarily by symbol value, then
++ * reconstruct the prefix code by generating codewords lexicographically
++ * in that order.
++ *
++ * This function does not, however, generate the prefix code explicitly.
++ * Instead, it directly builds a table for decoding symbols using the
++ * code. The basic idea is this: given the next 'max_codeword_len' bits
++ * in the input, we can look up the decoded symbol by indexing a table
++ * containing 2**max_codeword_len entries. A codeword with length
++ * 'max_codeword_len' will have exactly one entry in this table, whereas
++ * a codeword shorter than 'max_codeword_len' will have multiple entries
++ * in this table. Precisely, a codeword of length n will be represented
++ * by 2**(max_codeword_len - n) entries in this table. The 0-based index
++ * of each such entry will contain the corresponding codeword as a prefix
++ * when zero-padded on the left to 'max_codeword_len' binary digits.
++ *
++ * That's the basic idea, but we implement two optimizations regarding
++ * the format of the decode table itself:
++ *
++ * - For many compression formats, the maximum codeword length is too
++ * long for it to be efficient to build the full decoding table
++ * whenever a new prefix code is used. Instead, we can build the table
++ * using only 2**table_bits entries, where 'table_bits' is some number
++ * less than or equal to 'max_codeword_len'. Then, only codewords of
++ * length 'table_bits' and shorter can be directly looked up. For
++ * longer codewords, the direct lookup instead produces the root of a
++ * binary tree. Using this tree, the decoder can do traditional
++ * bit-by-bit decoding of the remainder of the codeword. Child nodes
++ * are allocated in extra entries at the end of the table; leaf nodes
++ * contain symbols. Note that the long-codeword case is, in general,
++ * not performance critical, since in Huffman codes the most frequently
++ * used symbols are assigned the shortest codeword lengths.
++ *
++ * - When we decode a symbol using a direct lookup of the table, we still
++ * need to know its length so that the bitstream can be advanced by the
++ * appropriate number of bits. The simple solution is to simply retain
++ * the 'lens' array and use the decoded symbol as an index into it.
++ * However, this requires two separate array accesses in the fast path.
++ * The optimization is to store the length directly in the decode
++ * table. We use the bottom 11 bits for the symbol and the top 5 bits
++ * for the length. In addition, to combine this optimization with the
++ * previous one, we introduce a special case where the top 2 bits of
++ * the length are both set if the entry is actually the root of a
++ * binary tree.
++ *
++ * @decode_table:
++ * The array in which to create the decoding table. This must have
++ * a length of at least ((2**table_bits) + 2 * num_syms) entries.
++ *
++ * @num_syms:
++ * The number of symbols in the alphabet; also, the length of the
++ * 'lens' array. Must be less than or equal to 2048.
++ *
++ * @table_bits:
++ * The order of the decode table size, as explained above. Must be
++ * less than or equal to 13.
++ *
++ * @lens:
++ * An array of length @num_syms, indexable by symbol, that gives the
++ * length of the codeword, in bits, for that symbol. The length can
++ * be 0, which means that the symbol does not have a codeword
++ * assigned.
++ *
++ * @max_codeword_len:
++ * The longest codeword length allowed in the compression format.
++ * All entries in 'lens' must be less than or equal to this value.
++ * This must be less than or equal to 23.
++ *
++ * @working_space
++ * A temporary array of length '2 * (max_codeword_len + 1) +
++ * num_syms'.
++ *
++ * Returns 0 on success, or -1 if the lengths do not form a valid prefix
++ * code.
++ */
++int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
++ const u32 table_bits, const u8 lens[],
++ const u32 max_codeword_len,
++ u16 working_space[])
++{
++ const u32 table_num_entries = 1 << table_bits;
++ u16 * const len_counts = &working_space[0];
++ u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
++ u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
++ int left;
++ void *decode_table_ptr;
++ u32 sym_idx;
++ u32 codeword_len;
++ u32 stores_per_loop;
++ u32 decode_table_pos;
++ u32 len;
++ u32 sym;
++
++ /* Count how many symbols have each possible codeword length.
++ * Note that a length of 0 indicates the corresponding symbol is not
++ * used in the code and therefore does not have a codeword.
++ */
++ for (len = 0; len <= max_codeword_len; len++)
++ len_counts[len] = 0;
++ for (sym = 0; sym < num_syms; sym++)
++ len_counts[lens[sym]]++;
++
++ /* We can assume all lengths are <= max_codeword_len, but we
++ * cannot assume they form a valid prefix code. A codeword of
++ * length n should require a proportion of the codespace equaling
++ * (1/2)^n. The code is valid if and only if the codespace is
++ * exactly filled by the lengths, by this measure.
++ */
++ left = 1;
++ for (len = 1; len <= max_codeword_len; len++) {
++ left <<= 1;
++ left -= len_counts[len];
++ if (left < 0) {
++ /* The lengths overflow the codespace; that is, the code
++ * is over-subscribed.
++ */
++ return -1;
++ }
++ }
++
++ if (left) {
++ /* The lengths do not fill the codespace; that is, they form an
++ * incomplete set.
++ */
++ if (left == (1 << max_codeword_len)) {
++ /* The code is completely empty. This is arguably
++ * invalid, but in fact it is valid in LZX and XPRESS,
++ * so we must allow it. By definition, no symbols can
++ * be decoded with an empty code. Consequently, we
++ * technically don't even need to fill in the decode
++ * table. However, to avoid accessing uninitialized
++ * memory if the algorithm nevertheless attempts to
++ * decode symbols using such a code, we zero out the
++ * decode table.
++ */
++ memset(decode_table, 0,
++ table_num_entries * sizeof(decode_table[0]));
++ return 0;
++ }
++ return -1;
++ }
++
++ /* Sort the symbols primarily by length and secondarily by symbol order.
++ */
++
++ /* Initialize 'offsets' so that offsets[len] for 1 <= len <=
++ * max_codeword_len is the number of codewords shorter than 'len' bits.
++ */
++ offsets[1] = 0;
++ for (len = 1; len < max_codeword_len; len++)
++ offsets[len + 1] = offsets[len] + len_counts[len];
++
++ /* Use the 'offsets' array to sort the symbols. Note that we do not
++ * include symbols that are not used in the code. Consequently, fewer
++ * than 'num_syms' entries in 'sorted_syms' may be filled.
++ */
++ for (sym = 0; sym < num_syms; sym++)
++ if (lens[sym])
++ sorted_syms[offsets[lens[sym]]++] = sym;
++
++ /* Fill entries for codewords with length <= table_bits
++ * --- that is, those short enough for a direct mapping.
++ *
++ * The table will start with entries for the shortest codeword(s), which
++ * have the most entries. From there, the number of entries per
++ * codeword will decrease.
++ */
++ decode_table_ptr = decode_table;
++ sym_idx = 0;
++ codeword_len = 1;
++ stores_per_loop = (1 << (table_bits - codeword_len));
++ for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
++ u32 end_sym_idx = sym_idx + len_counts[codeword_len];
++
++ for (; sym_idx < end_sym_idx; sym_idx++) {
++ u16 entry;
++ u16 *p;
++ u32 n;
++
++ entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx];
++ p = (u16 *)decode_table_ptr;
++ n = stores_per_loop;
++
++ do {
++ *p++ = entry;
++ } while (--n);
++
++ decode_table_ptr = p;
++ }
++ }
++
++ /* If we've filled in the entire table, we are done. Otherwise,
++ * there are codewords longer than table_bits for which we must
++ * generate binary trees.
++ */
++ decode_table_pos = (u16 *)decode_table_ptr - decode_table;
++ if (decode_table_pos != table_num_entries) {
++ u32 j;
++ u32 next_free_tree_slot;
++ u32 cur_codeword;
++
++ /* First, zero out the remaining entries. This is
++ * necessary so that these entries appear as
++ * "unallocated" in the next part. Each of these entries
++ * will eventually be filled with the representation of
++ * the root node of a binary tree.
++ */
++ j = decode_table_pos;
++ do {
++ decode_table[j] = 0;
++ } while (++j != table_num_entries);
++
++ /* We allocate child nodes starting at the end of the
++ * direct lookup table. Note that there should be
++ * 2*num_syms extra entries for this purpose, although
++ * fewer than this may actually be needed.
++ */
++ next_free_tree_slot = table_num_entries;
++
++ /* Iterate through each codeword with length greater than
++ * 'table_bits', primarily in order of codeword length
++ * and secondarily in order of symbol.
++ */
++ for (cur_codeword = decode_table_pos << 1;
++ codeword_len <= max_codeword_len;
++ codeword_len++, cur_codeword <<= 1) {
++ u32 end_sym_idx = sym_idx + len_counts[codeword_len];
++
++ for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) {
++ /* 'sorted_sym' is the symbol represented by the
++ * codeword.
++ */
++ u32 sorted_sym = sorted_syms[sym_idx];
++ u32 extra_bits = codeword_len - table_bits;
++ u32 node_idx = cur_codeword >> extra_bits;
++
++ /* Go through each bit of the current codeword
++ * beyond the prefix of length @table_bits and
++ * walk the appropriate binary tree, allocating
++ * any slots that have not yet been allocated.
++ *
++ * Note that the 'pointer' entry to the binary
++ * tree, which is stored in the direct lookup
++ * portion of the table, is represented
++ * identically to other internal (non-leaf)
++ * nodes of the binary tree; it can be thought
++ * of as simply the root of the tree. The
++ * representation of these internal nodes is
++ * simply the index of the left child combined
++ * with the special bits 0xC000 to distingush
++ * the entry from direct mapping and leaf node
++ * entries.
++ */
++ do {
++ /* At least one bit remains in the
++ * codeword, but the current node is an
++ * unallocated leaf. Change it to an
++ * internal node.
++ */
++ if (decode_table[node_idx] == 0) {
++ decode_table[node_idx] =
++ next_free_tree_slot | 0xC000;
++ decode_table[next_free_tree_slot++] = 0;
++ decode_table[next_free_tree_slot++] = 0;
++ }
++
++ /* Go to the left child if the next bit
++ * in the codeword is 0; otherwise go to
++ * the right child.
++ */
++ node_idx = decode_table[node_idx] & 0x3FFF;
++ --extra_bits;
++ node_idx += (cur_codeword >> extra_bits) & 1;
++ } while (extra_bits != 0);
++
++ /* We've traversed the tree using the entire
++ * codeword, and we're now at the entry where
++ * the actual symbol will be stored. This is
++ * distinguished from internal nodes by not
++ * having its high two bits set.
++ */
++ decode_table[node_idx] = sorted_sym;
++ }
++ }
++ }
++ return 0;
++}
+diff --git a/fs/ntfs3/lib/decompress_common.h b/fs/ntfs3/lib/decompress_common.h
+new file mode 100644
+index 000000000000..66297f398403
+--- /dev/null
++++ b/fs/ntfs3/lib/decompress_common.h
+@@ -0,0 +1,352 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++/*
++ * decompress_common.h - Code shared by the XPRESS and LZX decompressors
++ *
++ * Copyright (C) 2015 Eric Biggers
++ *
++ * This program is free software: you can redistribute it and/or modify it under
++ * the terms of the GNU General Public License as published by the Free Software
++ * Foundation, either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
++ * details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program. If not, see <http://www.gnu.org/licenses/>.
++ */
++
++#include <linux/string.h>
++#include <linux/compiler.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <asm/unaligned.h>
++
++
++/* "Force inline" macro (not required, but helpful for performance) */
++#define forceinline __always_inline
++
++/* Enable whole-word match copying on selected architectures */
++#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
++# define FAST_UNALIGNED_ACCESS
++#endif
++
++/* Size of a machine word */
++#define WORDBYTES (sizeof(size_t))
++
++static forceinline void
++copy_unaligned_word(const void *src, void *dst)
++{
++ put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst);
++}
++
++
++/* Generate a "word" with platform-dependent size whose bytes all contain the
++ * value 'b'.
++ */
++static forceinline size_t repeat_byte(u8 b)
++{
++ size_t v;
++
++ v = b;
++ v |= v << 8;
++ v |= v << 16;
++ v |= v << ((WORDBYTES == 8) ? 32 : 0);
++ return v;
++}
++
++/* Structure that encapsulates a block of in-memory data being interpreted as a
++ * stream of bits, optionally with interwoven literal bytes. Bits are assumed
++ * to be stored in little endian 16-bit coding units, with the bits ordered high
++ * to low.
++ */
++struct input_bitstream {
++
++ /* Bits that have been read from the input buffer. The bits are
++ * left-justified; the next bit is always bit 31.
++ */
++ u32 bitbuf;
++
++ /* Number of bits currently held in @bitbuf. */
++ u32 bitsleft;
++
++ /* Pointer to the next byte to be retrieved from the input buffer. */
++ const u8 *next;
++
++ /* Pointer to just past the end of the input buffer. */
++ const u8 *end;
++};
++
++/* Initialize a bitstream to read from the specified input buffer. */
++static forceinline void init_input_bitstream(struct input_bitstream *is,
++ const void *buffer, u32 size)
++{
++ is->bitbuf = 0;
++ is->bitsleft = 0;
++ is->next = buffer;
++ is->end = is->next + size;
++}
++
++/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
++ * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
++ * may be called on the bitstream to peek or remove up to @num_bits bits. Note
++ * that @num_bits must be <= 16.
++ */
++static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
++ u32 num_bits)
++{
++ if (is->bitsleft < num_bits) {
++ if (is->end - is->next >= 2) {
++ is->bitbuf |= (u32)get_unaligned_le16(is->next)
++ << (16 - is->bitsleft);
++ is->next += 2;
++ }
++ is->bitsleft += 16;
++ }
++}
++
++/* Return the next @num_bits bits from the bitstream, without removing them.
++ * There must be at least @num_bits remaining in the buffer variable, from a
++ * previous call to bitstream_ensure_bits().
++ */
++static forceinline u32
++bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
++{
++ return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
++}
++
++/* Remove @num_bits from the bitstream. There must be at least @num_bits
++ * remaining in the buffer variable, from a previous call to
++ * bitstream_ensure_bits().
++ */
++static forceinline void
++bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
++{
++ is->bitbuf <<= num_bits;
++ is->bitsleft -= num_bits;
++}
++
++/* Remove and return @num_bits bits from the bitstream. There must be at least
++ * @num_bits remaining in the buffer variable, from a previous call to
++ * bitstream_ensure_bits().
++ */
++static forceinline u32
++bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
++{
++ u32 bits = bitstream_peek_bits(is, num_bits);
++
++ bitstream_remove_bits(is, num_bits);
++ return bits;
++}
++
++/* Read and return the next @num_bits bits from the bitstream. */
++static forceinline u32
++bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
++{
++ bitstream_ensure_bits(is, num_bits);
++ return bitstream_pop_bits(is, num_bits);
++}
++
++/* Read and return the next literal byte embedded in the bitstream. */
++static forceinline u8
++bitstream_read_byte(struct input_bitstream *is)
++{
++ if (unlikely(is->end == is->next))
++ return 0;
++ return *is->next++;
++}
++
++/* Read and return the next 16-bit integer embedded in the bitstream. */
++static forceinline u16
++bitstream_read_u16(struct input_bitstream *is)
++{
++ u16 v;
++
++ if (unlikely(is->end - is->next < 2))
++ return 0;
++ v = get_unaligned_le16(is->next);
++ is->next += 2;
++ return v;
++}
++
++/* Read and return the next 32-bit integer embedded in the bitstream. */
++static forceinline u32
++bitstream_read_u32(struct input_bitstream *is)
++{
++ u32 v;
++
++ if (unlikely(is->end - is->next < 4))
++ return 0;
++ v = get_unaligned_le32(is->next);
++ is->next += 4;
++ return v;
++}
++
++/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
++ * Return either a pointer to the byte past the last written, or NULL if the
++ * read overflows the input buffer.
++ */
++static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
++ void *dst_buffer, size_t count)
++{
++ if ((size_t)(is->end - is->next) < count)
++ return NULL;
++ memcpy(dst_buffer, is->next, count);
++ is->next += count;
++ return (u8 *)dst_buffer + count;
++}
++
++/* Align the input bitstream on a coding-unit boundary. */
++static forceinline void bitstream_align(struct input_bitstream *is)
++{
++ is->bitsleft = 0;
++ is->bitbuf = 0;
++}
++
++extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
++ const u32 num_bits, const u8 lens[],
++ const u32 max_codeword_len,
++ u16 working_space[]);
++
++
++/* Reads and returns the next Huffman-encoded symbol from a bitstream. If the
++ * input data is exhausted, the Huffman symbol is decoded as if the missing bits
++ * are all zeroes.
++ */
++static forceinline u32 read_huffsym(struct input_bitstream *istream,
++ const u16 decode_table[],
++ u32 table_bits,
++ u32 max_codeword_len)
++{
++ u32 entry;
++ u32 key_bits;
++
++ bitstream_ensure_bits(istream, max_codeword_len);
++
++ /* Index the decode table by the next table_bits bits of the input. */
++ key_bits = bitstream_peek_bits(istream, table_bits);
++ entry = decode_table[key_bits];
++ if (entry < 0xC000) {
++ /* Fast case: The decode table directly provided the
++ * symbol and codeword length. The low 11 bits are the
++ * symbol, and the high 5 bits are the codeword length.
++ */
++ bitstream_remove_bits(istream, entry >> 11);
++ return entry & 0x7FF;
++ }
++ /* Slow case: The codeword for the symbol is longer than
++ * table_bits, so the symbol does not have an entry
++ * directly in the first (1 << table_bits) entries of the
++ * decode table. Traverse the appropriate binary tree
++ * bit-by-bit to decode the symbol.
++ */
++ bitstream_remove_bits(istream, table_bits);
++ do {
++ key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
++ } while ((entry = decode_table[key_bits]) >= 0xC000);
++ return entry;
++}
++
++/*
++ * Copy an LZ77 match at (dst - offset) to dst.
++ *
++ * The length and offset must be already validated --- that is, (dst - offset)
++ * can't underrun the output buffer, and (dst + length) can't overrun the output
++ * buffer. Also, the length cannot be 0.
++ *
++ * @bufend points to the byte past the end of the output buffer. This function
++ * won't write any data beyond this position.
++ *
++ * Returns dst + length.
++ */
++static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
++ u32 min_length)
++{
++ const u8 *src = dst - offset;
++
++ /*
++ * Try to copy one machine word at a time. On i386 and x86_64 this is
++ * faster than copying one byte at a time, unless the data is
++ * near-random and all the matches have very short lengths. Note that
++ * since this requires unaligned memory accesses, it won't necessarily
++ * be faster on every architecture.
++ *
++ * Also note that we might copy more than the length of the match. For
++ * example, if a word is 8 bytes and the match is of length 5, then
++ * we'll simply copy 8 bytes. This is okay as long as we don't write
++ * beyond the end of the output buffer, hence the check for (bufend -
++ * end >= WORDBYTES - 1).
++ */
++#ifdef FAST_UNALIGNED_ACCESS
++ u8 * const end = dst + length;
++
++ if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
++
++ if (offset >= WORDBYTES) {
++ /* The source and destination words don't overlap. */
++
++ /* To improve branch prediction, one iteration of this
++ * loop is unrolled. Most matches are short and will
++ * fail the first check. But if that check passes, then
++ * it becomes increasing likely that the match is long
++ * and we'll need to continue copying.
++ */
++
++ copy_unaligned_word(src, dst);
++ src += WORDBYTES;
++ dst += WORDBYTES;
++
++ if (dst < end) {
++ do {
++ copy_unaligned_word(src, dst);
++ src += WORDBYTES;
++ dst += WORDBYTES;
++ } while (dst < end);
++ }
++ return end;
++ } else if (offset == 1) {
++
++ /* Offset 1 matches are equivalent to run-length
++ * encoding of the previous byte. This case is common
++ * if the data contains many repeated bytes.
++ */
++ size_t v = repeat_byte(*(dst - 1));
++
++ do {
++ put_unaligned(v, (size_t *)dst);
++ src += WORDBYTES;
++ dst += WORDBYTES;
++ } while (dst < end);
++ return end;
++ }
++ /*
++ * We don't bother with special cases for other 'offset <
++ * WORDBYTES', which are usually rarer than 'offset == 1'. Extra
++ * checks will just slow things down. Actually, it's possible
++ * to handle all the 'offset < WORDBYTES' cases using the same
++ * code, but it still becomes more complicated doesn't seem any
++ * faster overall; it definitely slows down the more common
++ * 'offset == 1' case.
++ */
++ }
++#endif /* FAST_UNALIGNED_ACCESS */
++
++ /* Fall back to a bytewise copy. */
++
++ if (min_length >= 2) {
++ *dst++ = *src++;
++ length--;
++ }
++ if (min_length >= 3) {
++ *dst++ = *src++;
++ length--;
++ }
++ do {
++ *dst++ = *src++;
++ } while (--length);
++
++ return dst;
++}
+diff --git a/fs/ntfs3/lib/lib.h b/fs/ntfs3/lib/lib.h
+new file mode 100644
+index 000000000000..f508fbad2e71
+--- /dev/null
++++ b/fs/ntfs3/lib/lib.h
+@@ -0,0 +1,26 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++/*
++ * Adapted for linux kernel by Alexander Mamaev:
++ * - remove implementations of get_unaligned_
++ * - assume GCC is always defined
++ * - ISO C90
++ * - linux kernel code style
++ */
++
++
++/* globals from xpress_decompress.c */
++struct xpress_decompressor *xpress_allocate_decompressor(void);
++void xpress_free_decompressor(struct xpress_decompressor *d);
++int xpress_decompress(struct xpress_decompressor *__restrict d,
++ const void *__restrict compressed_data,
++ size_t compressed_size,
++ void *__restrict uncompressed_data,
++ size_t uncompressed_size);
++
++/* globals from lzx_decompress.c */
++struct lzx_decompressor *lzx_allocate_decompressor(void);
++void lzx_free_decompressor(struct lzx_decompressor *d);
++int lzx_decompress(struct lzx_decompressor *__restrict d,
++ const void *__restrict compressed_data,
++ size_t compressed_size, void *__restrict uncompressed_data,
++ size_t uncompressed_size);
+diff --git a/fs/ntfs3/lib/lzx_decompress.c b/fs/ntfs3/lib/lzx_decompress.c
+new file mode 100644
+index 000000000000..77a381a693d1
+--- /dev/null
++++ b/fs/ntfs3/lib/lzx_decompress.c
+@@ -0,0 +1,683 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * lzx_decompress.c - A decompressor for the LZX compression format, which can
++ * be used in "System Compressed" files. This is based on the code from wimlib.
++ * This code only supports a window size (dictionary size) of 32768 bytes, since
++ * this is the only size used in System Compression.
++ *
++ * Copyright (C) 2015 Eric Biggers
++ *
++ * This program is free software: you can redistribute it and/or modify it under
++ * the terms of the GNU General Public License as published by the Free Software
++ * Foundation, either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
++ * details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program. If not, see <http://www.gnu.org/licenses/>.
++ */
++
++#include "decompress_common.h"
++#include "lib.h"
++
++/* Number of literal byte values */
++#define LZX_NUM_CHARS 256
++
++/* The smallest and largest allowed match lengths */
++#define LZX_MIN_MATCH_LEN 2
++#define LZX_MAX_MATCH_LEN 257
++
++/* Number of distinct match lengths that can be represented */
++#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
++
++/* Number of match lengths for which no length symbol is required */
++#define LZX_NUM_PRIMARY_LENS 7
++#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
++
++/* Valid values of the 3-bit block type field */
++#define LZX_BLOCKTYPE_VERBATIM 1
++#define LZX_BLOCKTYPE_ALIGNED 2
++#define LZX_BLOCKTYPE_UNCOMPRESSED 3
++
++/* Number of offset slots for a window size of 32768 */
++#define LZX_NUM_OFFSET_SLOTS 30
++
++/* Number of symbols in the main code for a window size of 32768 */
++#define LZX_MAINCODE_NUM_SYMBOLS \
++ (LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
++
++/* Number of symbols in the length code */
++#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
++
++/* Number of symbols in the precode */
++#define LZX_PRECODE_NUM_SYMBOLS 20
++
++/* Number of bits in which each precode codeword length is represented */
++#define LZX_PRECODE_ELEMENT_SIZE 4
++
++/* Number of low-order bits of each match offset that are entropy-encoded in
++ * aligned offset blocks
++ */
++#define LZX_NUM_ALIGNED_OFFSET_BITS 3
++
++/* Number of symbols in the aligned offset code */
++#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
++
++/* Mask for the match offset bits that are entropy-encoded in aligned offset
++ * blocks
++ */
++#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
++
++/* Number of bits in which each aligned offset codeword length is represented */
++#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
++
++/* Maximum lengths (in bits) of the codewords in each Huffman code */
++#define LZX_MAX_MAIN_CODEWORD_LEN 16
++#define LZX_MAX_LEN_CODEWORD_LEN 16
++#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
++#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
++
++/* The default "filesize" value used in pre/post-processing. In the LZX format
++ * used in cabinet files this value must be given to the decompressor, whereas
++ * in the LZX format used in WIM files and system-compressed files this value is
++ * fixed at 12000000.
++ */
++#define LZX_DEFAULT_FILESIZE 12000000
++
++/* Assumed block size when the encoded block size begins with a 0 bit. */
++#define LZX_DEFAULT_BLOCK_SIZE 32768
++
++/* Number of offsets in the recent (or "repeat") offsets queue. */
++#define LZX_NUM_RECENT_OFFSETS 3
++
++/* These values are chosen for fast decompression. */
++#define LZX_MAINCODE_TABLEBITS 11
++#define LZX_LENCODE_TABLEBITS 10
++#define LZX_PRECODE_TABLEBITS 6
++#define LZX_ALIGNEDCODE_TABLEBITS 7
++
++#define LZX_READ_LENS_MAX_OVERRUN 50
++
++/* Mapping: offset slot => first match offset that uses that offset slot.
++ */
++static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = {
++ 0, 1, 2, 3, 4, /* 0 --- 4 */
++ 6, 8, 12, 16, 24, /* 5 --- 9 */
++ 32, 48, 64, 96, 128, /* 10 --- 14 */
++ 192, 256, 384, 512, 768, /* 15 --- 19 */
++ 1024, 1536, 2048, 3072, 4096, /* 20 --- 24 */
++ 6144, 8192, 12288, 16384, 24576, /* 25 --- 29 */
++ 32768, /* extra */
++};
++
++/* Mapping: offset slot => how many extra bits must be read and added to the
++ * corresponding offset slot base to decode the match offset.
++ */
++static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = {
++ 0, 0, 0, 0, 1,
++ 1, 2, 2, 3, 3,
++ 4, 4, 5, 5, 6,
++ 6, 7, 7, 8, 8,
++ 9, 9, 10, 10, 11,
++ 11, 12, 12, 13, 13,
++};
++
++/* Reusable heap-allocated memory for LZX decompression */
++struct lzx_decompressor {
++
++ /* Huffman decoding tables, and arrays that map symbols to codeword
++ * lengths
++ */
++
++ u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
++ (LZX_MAINCODE_NUM_SYMBOLS * 2)];
++ u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
++
++
++ u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
++ (LZX_LENCODE_NUM_SYMBOLS * 2)];
++ u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
++
++
++ u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
++ (LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)];
++ u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
++
++ u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
++ (LZX_PRECODE_NUM_SYMBOLS * 2)];
++ u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
++
++ /* Temporary space for make_huffman_decode_table() */
++ u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) +
++ LZX_MAINCODE_NUM_SYMBOLS];
++};
++
++static void undo_e8_translation(void *target, s32 input_pos)
++{
++ s32 abs_offset, rel_offset;
++
++ abs_offset = get_unaligned_le32(target);
++ if (abs_offset >= 0) {
++ if (abs_offset < LZX_DEFAULT_FILESIZE) {
++ /* "good translation" */
++ rel_offset = abs_offset - input_pos;
++ put_unaligned_le32(rel_offset, target);
++ }
++ } else {
++ if (abs_offset >= -input_pos) {
++ /* "compensating translation" */
++ rel_offset = abs_offset + LZX_DEFAULT_FILESIZE;
++ put_unaligned_le32(rel_offset, target);
++ }
++ }
++}
++
++/*
++ * Undo the 'E8' preprocessing used in LZX. Before compression, the
++ * uncompressed data was preprocessed by changing the targets of suspected x86
++ * CALL instructions from relative offsets to absolute offsets. After
++ * match/literal decoding, the decompressor must undo the translation.
++ */
++static void lzx_postprocess(u8 *data, u32 size)
++{
++ /*
++ * A worthwhile optimization is to push the end-of-buffer check into the
++ * relatively rare E8 case. This is possible if we replace the last six
++ * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
++ * before reaching end-of-buffer. In addition, this scheme guarantees
++ * that no translation can begin following an E8 byte in the last 10
++ * bytes because a 4-byte offset containing E8 as its high byte is a
++ * large negative number that is not valid for translation. That is
++ * exactly what we need.
++ */
++ u8 *tail;
++ u8 saved_bytes[6];
++ u8 *p;
++
++ if (size <= 10)
++ return;
++
++ tail = &data[size - 6];
++ memcpy(saved_bytes, tail, 6);
++ memset(tail, 0xE8, 6);
++ p = data;
++ for (;;) {
++ while (*p != 0xE8)
++ p++;
++ if (p >= tail)
++ break;
++ undo_e8_translation(p + 1, p - data);
++ p += 5;
++ }
++ memcpy(tail, saved_bytes, 6);
++}
++
++/* Read a Huffman-encoded symbol using the precode. */
++static forceinline u32 read_presym(const struct lzx_decompressor *d,
++ struct input_bitstream *is)
++{
++ return read_huffsym(is, d->precode_decode_table,
++ LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
++}
++
++/* Read a Huffman-encoded symbol using the main code. */
++static forceinline u32 read_mainsym(const struct lzx_decompressor *d,
++ struct input_bitstream *is)
++{
++ return read_huffsym(is, d->maincode_decode_table,
++ LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
++}
++
++/* Read a Huffman-encoded symbol using the length code. */
++static forceinline u32 read_lensym(const struct lzx_decompressor *d,
++ struct input_bitstream *is)
++{
++ return read_huffsym(is, d->lencode_decode_table,
++ LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
++}
++
++/* Read a Huffman-encoded symbol using the aligned offset code. */
++static forceinline u32 read_alignedsym(const struct lzx_decompressor *d,
++ struct input_bitstream *is)
++{
++ return read_huffsym(is, d->alignedcode_decode_table,
++ LZX_ALIGNEDCODE_TABLEBITS,
++ LZX_MAX_ALIGNED_CODEWORD_LEN);
++}
++
++/*
++ * Read the precode from the compressed input bitstream, then use it to decode
++ * @num_lens codeword length values.
++ *
++ * @is: The input bitstream.
++ *
++ * @lens: An array that contains the length values from the previous time
++ * the codeword lengths for this Huffman code were read, or all 0's
++ * if this is the first time. This array must have at least
++ * (@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries.
++ *
++ * @num_lens: Number of length values to decode.
++ *
++ * Returns 0 on success, or -1 if the data was invalid.
++ */
++static int lzx_read_codeword_lens(struct lzx_decompressor *d,
++ struct input_bitstream *is,
++ u8 *lens, u32 num_lens)
++{
++ u8 *len_ptr = lens;
++ u8 *lens_end = lens + num_lens;
++ int i;
++
++ /* Read the lengths of the precode codewords. These are given
++ * explicitly.
++ */
++ for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
++ d->precode_lens[i] =
++ bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
++ }
++
++ /* Make the decoding table for the precode. */
++ if (make_huffman_decode_table(d->precode_decode_table,
++ LZX_PRECODE_NUM_SYMBOLS,
++ LZX_PRECODE_TABLEBITS,
++ d->precode_lens,
++ LZX_MAX_PRE_CODEWORD_LEN,
++ d->working_space))
++ return -1;
++
++ /* Decode the codeword lengths. */
++ do {
++ u32 presym;
++ u8 len;
++
++ /* Read the next precode symbol. */
++ presym = read_presym(d, is);
++ if (presym < 17) {
++ /* Difference from old length */
++ len = *len_ptr - presym;
++ if ((s8)len < 0)
++ len += 17;
++ *len_ptr++ = len;
++ } else {
++ /* Special RLE values */
++
++ u32 run_len;
++
++ if (presym == 17) {
++ /* Run of 0's */
++ run_len = 4 + bitstream_read_bits(is, 4);
++ len = 0;
++ } else if (presym == 18) {
++ /* Longer run of 0's */
++ run_len = 20 + bitstream_read_bits(is, 5);
++ len = 0;
++ } else {
++ /* Run of identical lengths */
++ run_len = 4 + bitstream_read_bits(is, 1);
++ presym = read_presym(d, is);
++ if (presym > 17)
++ return -1;
++ len = *len_ptr - presym;
++ if ((s8)len < 0)
++ len += 17;
++ }
++
++ do {
++ *len_ptr++ = len;
++ } while (--run_len);
++ /* Worst case overrun is when presym == 18,
++ * run_len == 20 + 31, and only 1 length was remaining.
++ * So LZX_READ_LENS_MAX_OVERRUN == 50.
++ *
++ * Overrun while reading the first half of maincode_lens
++ * can corrupt the previous values in the second half.
++ * This doesn't really matter because the resulting
++ * lengths will still be in range, and data that
++ * generates overruns is invalid anyway.
++ */
++ }
++ } while (len_ptr < lens_end);
++
++ return 0;
++}
++
++/*
++ * Read the header of an LZX block and save the block type and (uncompressed)
++ * size in *block_type_ret and *block_size_ret, respectively.
++ *
++ * If the block is compressed, also update the Huffman decode @tables with the
++ * new Huffman codes. If the block is uncompressed, also update the match
++ * offset @queue with the new match offsets.
++ *
++ * Return 0 on success, or -1 if the data was invalid.
++ */
++static int lzx_read_block_header(struct lzx_decompressor *d,
++ struct input_bitstream *is,
++ int *block_type_ret,
++ u32 *block_size_ret,
++ u32 recent_offsets[])
++{
++ int block_type;
++ u32 block_size;
++ int i;
++
++ bitstream_ensure_bits(is, 4);
++
++ /* The first three bits tell us what kind of block it is, and should be
++ * one of the LZX_BLOCKTYPE_* values.
++ */
++ block_type = bitstream_pop_bits(is, 3);
++
++ /* Read the block size. */
++ if (bitstream_pop_bits(is, 1)) {
++ block_size = LZX_DEFAULT_BLOCK_SIZE;
++ } else {
++ block_size = 0;
++ block_size |= bitstream_read_bits(is, 8);
++ block_size <<= 8;
++ block_size |= bitstream_read_bits(is, 8);
++ }
++
++ switch (block_type) {
++
++ case LZX_BLOCKTYPE_ALIGNED:
++
++ /* Read the aligned offset code and prepare its decode table.
++ */
++
++ for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
++ d->alignedcode_lens[i] =
++ bitstream_read_bits(is,
++ LZX_ALIGNEDCODE_ELEMENT_SIZE);
++ }
++
++ if (make_huffman_decode_table(d->alignedcode_decode_table,
++ LZX_ALIGNEDCODE_NUM_SYMBOLS,
++ LZX_ALIGNEDCODE_TABLEBITS,
++ d->alignedcode_lens,
++ LZX_MAX_ALIGNED_CODEWORD_LEN,
++ d->working_space))
++ return -1;
++
++ /* Fall though, since the rest of the header for aligned offset
++ * blocks is the same as that for verbatim blocks.
++ */
++ fallthrough;
++
++ case LZX_BLOCKTYPE_VERBATIM:
++
++ /* Read the main code and prepare its decode table.
++ *
++ * Note that the codeword lengths in the main code are encoded
++ * in two parts: one part for literal symbols, and one part for
++ * match symbols.
++ */
++
++ if (lzx_read_codeword_lens(d, is, d->maincode_lens,
++ LZX_NUM_CHARS))
++ return -1;
++
++ if (lzx_read_codeword_lens(d, is,
++ d->maincode_lens + LZX_NUM_CHARS,
++ LZX_MAINCODE_NUM_SYMBOLS - LZX_NUM_CHARS))
++ return -1;
++
++ if (make_huffman_decode_table(d->maincode_decode_table,
++ LZX_MAINCODE_NUM_SYMBOLS,
++ LZX_MAINCODE_TABLEBITS,
++ d->maincode_lens,
++ LZX_MAX_MAIN_CODEWORD_LEN,
++ d->working_space))
++ return -1;
++
++ /* Read the length code and prepare its decode table. */
++
++ if (lzx_read_codeword_lens(d, is, d->lencode_lens,
++ LZX_LENCODE_NUM_SYMBOLS))
++ return -1;
++
++ if (make_huffman_decode_table(d->lencode_decode_table,
++ LZX_LENCODE_NUM_SYMBOLS,
++ LZX_LENCODE_TABLEBITS,
++ d->lencode_lens,
++ LZX_MAX_LEN_CODEWORD_LEN,
++ d->working_space))
++ return -1;
++
++ break;
++
++ case LZX_BLOCKTYPE_UNCOMPRESSED:
++
++ /* Before reading the three recent offsets from the uncompressed
++ * block header, the stream must be aligned on a 16-bit
++ * boundary. But if the stream is *already* aligned, then the
++ * next 16 bits must be discarded.
++ */
++ bitstream_ensure_bits(is, 1);
++ bitstream_align(is);
++
++ recent_offsets[0] = bitstream_read_u32(is);
++ recent_offsets[1] = bitstream_read_u32(is);
++ recent_offsets[2] = bitstream_read_u32(is);
++
++ /* Offsets of 0 are invalid. */
++ if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
++ recent_offsets[2] == 0)
++ return -1;
++ break;
++
++ default:
++ /* Unrecognized block type. */
++ return -1;
++ }
++
++ *block_type_ret = block_type;
++ *block_size_ret = block_size;
++ return 0;
++}
++
++/* Decompress a block of LZX-compressed data. */
++static int lzx_decompress_block(const struct lzx_decompressor *d,
++ struct input_bitstream *is,
++ int block_type, u32 block_size,
++ u8 * const out_begin, u8 *out_next,
++ u32 recent_offsets[])
++{
++ u8 * const block_end = out_next + block_size;
++ u32 ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
++
++ do {
++ u32 mainsym;
++ u32 match_len;
++ u32 match_offset;
++ u32 offset_slot;
++ u32 num_extra_bits;
++
++ mainsym = read_mainsym(d, is);
++ if (mainsym < LZX_NUM_CHARS) {
++ /* Literal */
++ *out_next++ = mainsym;
++ continue;
++ }
++
++ /* Match */
++
++ /* Decode the length header and offset slot. */
++ mainsym -= LZX_NUM_CHARS;
++ match_len = mainsym % LZX_NUM_LEN_HEADERS;
++ offset_slot = mainsym / LZX_NUM_LEN_HEADERS;
++
++ /* If needed, read a length symbol to decode the full length. */
++ if (match_len == LZX_NUM_PRIMARY_LENS)
++ match_len += read_lensym(d, is);
++ match_len += LZX_MIN_MATCH_LEN;
++
++ if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
++ /* Repeat offset */
++
++ /* Note: This isn't a real LRU queue, since using the R2
++ * offset doesn't bump the R1 offset down to R2. This
++ * quirk allows all 3 recent offsets to be handled by
++ * the same code. (For R0, the swap is a no-op.)
++ */
++ match_offset = recent_offsets[offset_slot];
++ recent_offsets[offset_slot] = recent_offsets[0];
++ recent_offsets[0] = match_offset;
++ } else {
++ /* Explicit offset */
++
++ /* Look up the number of extra bits that need to be read
++ * to decode offsets with this offset slot.
++ */
++ num_extra_bits = lzx_extra_offset_bits[offset_slot];
++
++ /* Start with the offset slot base value. */
++ match_offset = lzx_offset_slot_base[offset_slot];
++
++ /* In aligned offset blocks, the low-order 3 bits of
++ * each offset are encoded using the aligned offset
++ * code. Otherwise, all the extra bits are literal.
++ */
++
++ if ((num_extra_bits & ones_if_aligned) >= LZX_NUM_ALIGNED_OFFSET_BITS) {
++ match_offset +=
++ bitstream_read_bits(is, num_extra_bits -
++ LZX_NUM_ALIGNED_OFFSET_BITS)
++ << LZX_NUM_ALIGNED_OFFSET_BITS;
++ match_offset += read_alignedsym(d, is);
++ } else {
++ match_offset += bitstream_read_bits(is, num_extra_bits);
++ }
++
++ /* Adjust the offset. */
++ match_offset -= (LZX_NUM_RECENT_OFFSETS - 1);
++
++ /* Update the recent offsets. */
++ recent_offsets[2] = recent_offsets[1];
++ recent_offsets[1] = recent_offsets[0];
++ recent_offsets[0] = match_offset;
++ }
++
++ /* Validate the match, then copy it to the current position. */
++
++ if (match_len > (size_t)(block_end - out_next))
++ return -1;
++
++ if (match_offset > (size_t)(out_next - out_begin))
++ return -1;
++
++ out_next = lz_copy(out_next, match_len, match_offset,
++ block_end, LZX_MIN_MATCH_LEN);
++
++ } while (out_next != block_end);
++
++ return 0;
++}
++
++/*
++ * lzx_allocate_decompressor - Allocate an LZX decompressor
++ *
++ * Return the pointer to the decompressor on success, or return NULL and set
++ * errno on failure.
++ */
++struct lzx_decompressor *lzx_allocate_decompressor(void)
++{
++ return kmalloc(sizeof(struct lzx_decompressor), GFP_NOFS);
++}
++
++/*
++ * lzx_decompress - Decompress a buffer of LZX-compressed data
++ *
++ * @decompressor: A decompressor allocated with lzx_allocate_decompressor()
++ * @compressed_data: The buffer of data to decompress
++ * @compressed_size: Number of bytes of compressed data
++ * @uncompressed_data: The buffer in which to store the decompressed data
++ * @uncompressed_size: The number of bytes the data decompresses into
++ *
++ * Return 0 on success, or return -1 and set errno on failure.
++ */
++int lzx_decompress(struct lzx_decompressor *decompressor,
++ const void *compressed_data, size_t compressed_size,
++ void *uncompressed_data, size_t uncompressed_size)
++{
++ struct lzx_decompressor *d = decompressor;
++ u8 * const out_begin = uncompressed_data;
++ u8 *out_next = out_begin;
++ u8 * const out_end = out_begin + uncompressed_size;
++ struct input_bitstream is;
++ u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
++ int e8_status = 0;
++
++ init_input_bitstream(&is, compressed_data, compressed_size);
++
++ /* Codeword lengths begin as all 0's for delta encoding purposes. */
++ memset(d->maincode_lens, 0, LZX_MAINCODE_NUM_SYMBOLS);
++ memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
++
++ /* Decompress blocks until we have all the uncompressed data. */
++
++ while (out_next != out_end) {
++ int block_type;
++ u32 block_size;
++
++ if (lzx_read_block_header(d, &is, &block_type, &block_size,
++ recent_offsets))
++ goto invalid;
++
++ if (block_size < 1 || block_size > (size_t)(out_end - out_next))
++ goto invalid;
++
++ if (block_type != LZX_BLOCKTYPE_UNCOMPRESSED) {
++
++ /* Compressed block */
++
++ if (lzx_decompress_block(d,
++ &is,
++ block_type,
++ block_size,
++ out_begin,
++ out_next,
++ recent_offsets))
++ goto invalid;
++
++ e8_status |= d->maincode_lens[0xe8];
++ out_next += block_size;
++ } else {
++ /* Uncompressed block */
++
++ out_next = bitstream_read_bytes(&is, out_next,
++ block_size);
++ if (!out_next)
++ goto invalid;
++
++ if (block_size & 1)
++ bitstream_read_byte(&is);
++
++ e8_status = 1;
++ }
++ }
++
++ /* Postprocess the data unless it cannot possibly contain 0xe8 bytes. */
++ if (e8_status)
++ lzx_postprocess(uncompressed_data, uncompressed_size);
++
++ return 0;
++
++invalid:
++ return -1;
++}
++
++/*
++ * lzx_free_decompressor - Free an LZX decompressor
++ *
++ * @decompressor: A decompressor that was allocated with
++ * lzx_allocate_decompressor(), or NULL.
++ */
++void lzx_free_decompressor(struct lzx_decompressor *decompressor)
++{
++ kfree(decompressor);
++}
+diff --git a/fs/ntfs3/lib/xpress_decompress.c b/fs/ntfs3/lib/xpress_decompress.c
+new file mode 100644
+index 000000000000..3d98f36a981e
+--- /dev/null
++++ b/fs/ntfs3/lib/xpress_decompress.c
+@@ -0,0 +1,155 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * xpress_decompress.c - A decompressor for the XPRESS compression format
++ * (Huffman variant), which can be used in "System Compressed" files. This is
++ * based on the code from wimlib.
++ *
++ * Copyright (C) 2015 Eric Biggers
++ *
++ * This program is free software: you can redistribute it and/or modify it under
++ * the terms of the GNU General Public License as published by the Free Software
++ * Foundation, either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
++ * details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program. If not, see <http://www.gnu.org/licenses/>.
++ */
++
++#include "decompress_common.h"
++#include "lib.h"
++
++#define XPRESS_NUM_SYMBOLS 512
++#define XPRESS_MAX_CODEWORD_LEN 15
++#define XPRESS_MIN_MATCH_LEN 3
++
++/* This value is chosen for fast decompression. */
++#define XPRESS_TABLEBITS 12
++
++/* Reusable heap-allocated memory for XPRESS decompression */
++struct xpress_decompressor {
++
++ /* The Huffman decoding table */
++ u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
++
++ /* An array that maps symbols to codeword lengths */
++ u8 lens[XPRESS_NUM_SYMBOLS];
++
++ /* Temporary space for make_huffman_decode_table() */
++ u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) +
++ XPRESS_NUM_SYMBOLS];
++};
++
++/*
++ * xpress_allocate_decompressor - Allocate an XPRESS decompressor
++ *
++ * Return the pointer to the decompressor on success, or return NULL and set
++ * errno on failure.
++ */
++struct xpress_decompressor *xpress_allocate_decompressor(void)
++{
++ return kmalloc(sizeof(struct xpress_decompressor), GFP_NOFS);
++}
++
++/*
++ * xpress_decompress - Decompress a buffer of XPRESS-compressed data
++ *
++ * @decompressor: A decompressor that was allocated with
++ * xpress_allocate_decompressor()
++ * @compressed_data: The buffer of data to decompress
++ * @compressed_size: Number of bytes of compressed data
++ * @uncompressed_data: The buffer in which to store the decompressed data
++ * @uncompressed_size: The number of bytes the data decompresses into
++ *
++ * Return 0 on success, or return -1 and set errno on failure.
++ */
++int xpress_decompress(struct xpress_decompressor *decompressor,
++ const void *compressed_data, size_t compressed_size,
++ void *uncompressed_data, size_t uncompressed_size)
++{
++ struct xpress_decompressor *d = decompressor;
++ const u8 * const in_begin = compressed_data;
++ u8 * const out_begin = uncompressed_data;
++ u8 *out_next = out_begin;
++ u8 * const out_end = out_begin + uncompressed_size;
++ struct input_bitstream is;
++ u32 i;
++
++ /* Read the Huffman codeword lengths. */
++ if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
++ goto invalid;
++ for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
++ d->lens[i*2 + 0] = in_begin[i] & 0xF;
++ d->lens[i*2 + 1] = in_begin[i] >> 4;
++ }
++
++ /* Build a decoding table for the Huffman code. */
++ if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
++ XPRESS_TABLEBITS, d->lens,
++ XPRESS_MAX_CODEWORD_LEN,
++ d->working_space))
++ goto invalid;
++
++ /* Decode the matches and literals. */
++
++ init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
++ compressed_size - XPRESS_NUM_SYMBOLS / 2);
++
++ while (out_next != out_end) {
++ u32 sym;
++ u32 log2_offset;
++ u32 length;
++ u32 offset;
++
++ sym = read_huffsym(&is, d->decode_table,
++ XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
++ if (sym < 256) {
++ /* Literal */
++ *out_next++ = sym;
++ } else {
++ /* Match */
++ length = sym & 0xf;
++ log2_offset = (sym >> 4) & 0xf;
++
++ bitstream_ensure_bits(&is, 16);
++
++ offset = ((u32)1 << log2_offset) |
++ bitstream_pop_bits(&is, log2_offset);
++
++ if (length == 0xf) {
++ length += bitstream_read_byte(&is);
++ if (length == 0xf + 0xff)
++ length = bitstream_read_u16(&is);
++ }
++ length += XPRESS_MIN_MATCH_LEN;
++
++ if (offset > (size_t)(out_next - out_begin))
++ goto invalid;
++
++ if (length > (size_t)(out_end - out_next))
++ goto invalid;
++
++ out_next = lz_copy(out_next, length, offset, out_end,
++ XPRESS_MIN_MATCH_LEN);
++ }
++ }
++ return 0;
++
++invalid:
++ return -1;
++}
++
++/*
++ * xpress_free_decompressor - Free an XPRESS decompressor
++ *
++ * @decompressor: A decompressor that was allocated with
++ * xpress_allocate_decompressor(), or NULL.
++ */
++void xpress_free_decompressor(struct xpress_decompressor *decompressor)
++{
++ kfree(decompressor);
++}
+diff --git a/fs/ntfs3/lznt.c b/fs/ntfs3/lznt.c
+new file mode 100644
+index 000000000000..edba953b754a
+--- /dev/null
++++ b/fs/ntfs3/lznt.c
+@@ -0,0 +1,452 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ *
++ * Copyright (C) 2019-2020 Paragon Software GmbH, All rights reserved.
++ *
++ */
++#include <linux/blkdev.h>
++#include <linux/buffer_head.h>
++#include <linux/fs.h>
++#include <linux/nls.h>
++
++#include "debug.h"
++#include "ntfs.h"
++#include "ntfs_fs.h"
++
++// clang-format off
++/* src buffer is zero */
++#define LZNT_ERROR_ALL_ZEROS 1
++#define LZNT_CHUNK_SIZE 0x1000
++// clang-format on
++
++struct lznt_hash {
++ const u8 *p1;
++ const u8 *p2;
++};
++
++struct lznt {
++ const u8 *unc;
++ const u8 *unc_end;
++ const u8 *best_match;
++ size_t max_len;
++ bool std;
++
++ struct lznt_hash hash[LZNT_CHUNK_SIZE];
++};
++
++static inline size_t get_match_len(const u8 *ptr, const u8 *end, const u8 *prev,
++ size_t max_len)
++{
++ size_t len = 0;
++
++ while (ptr + len < end && ptr[len] == prev[len] && ++len < max_len)
++ ;
++ return len;
++}
++
++static size_t longest_match_std(const u8 *src, struct lznt *ctx)
++{
++ size_t hash_index;
++ size_t len1 = 0, len2 = 0;
++ const u8 **hash;
++
++ hash_index =
++ ((40543U * ((((src[0] << 4) ^ src[1]) << 4) ^ src[2])) >> 4) &
++ (LZNT_CHUNK_SIZE - 1);
++
++ hash = &(ctx->hash[hash_index].p1);
++
++ if (hash[0] >= ctx->unc && hash[0] < src && hash[0][0] == src[0] &&
++ hash[0][1] == src[1] && hash[0][2] == src[2]) {
++ len1 = 3;
++ if (ctx->max_len > 3)
++ len1 += get_match_len(src + 3, ctx->unc_end,
++ hash[0] + 3, ctx->max_len - 3);
++ }
++
++ if (hash[1] >= ctx->unc && hash[1] < src && hash[1][0] == src[0] &&
++ hash[1][1] == src[1] && hash[1][2] == src[2]) {
++ len2 = 3;
++ if (ctx->max_len > 3)
++ len2 += get_match_len(src + 3, ctx->unc_end,
++ hash[1] + 3, ctx->max_len - 3);
++ }
++
++ /* Compare two matches and select the best one */
++ if (len1 < len2) {
++ ctx->best_match = hash[1];
++ len1 = len2;
++ } else {
++ ctx->best_match = hash[0];
++ }
++
++ hash[1] = hash[0];
++ hash[0] = src;
++ return len1;
++}
++
++static size_t longest_match_best(const u8 *src, struct lznt *ctx)
++{
++ size_t max_len;
++ const u8 *ptr;
++
++ if (ctx->unc >= src || !ctx->max_len)
++ return 0;
++
++ max_len = 0;
++ for (ptr = ctx->unc; ptr < src; ++ptr) {
++ size_t len =
++ get_match_len(src, ctx->unc_end, ptr, ctx->max_len);
++ if (len >= max_len) {
++ max_len = len;
++ ctx->best_match = ptr;
++ }
++ }
++
++ return max_len >= 3 ? max_len : 0;
++}
++
++static const size_t s_max_len[] = {
++ 0x1002, 0x802, 0x402, 0x202, 0x102, 0x82, 0x42, 0x22, 0x12,
++};
++
++static const size_t s_max_off[] = {
++ 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
++};
++
++static inline u16 make_pair(size_t offset, size_t len, size_t index)
++{
++ return ((offset - 1) << (12 - index)) |
++ ((len - 3) & (((1 << (12 - index)) - 1)));
++}
++
++static inline size_t parse_pair(u16 pair, size_t *offset, size_t index)
++{
++ *offset = 1 + (pair >> (12 - index));
++ return 3 + (pair & ((1 << (12 - index)) - 1));
++}
++
++/*
++ * compress_chunk
++ *
++ * returns one of the three values:
++ * 0 - ok, 'cmpr' contains 'cmpr_chunk_size' bytes of compressed data
++ * 1 - input buffer is full zero
++ * -2 - the compressed buffer is too small to hold the compressed data
++ */
++static inline int compress_chunk(size_t (*match)(const u8 *, struct lznt *),
++ const u8 *unc, const u8 *unc_end, u8 *cmpr,
++ u8 *cmpr_end, size_t *cmpr_chunk_size,
++ struct lznt *ctx)
++{
++ size_t cnt = 0;
++ size_t idx = 0;
++ const u8 *up = unc;
++ u8 *cp = cmpr + 3;
++ u8 *cp2 = cmpr + 2;
++ u8 not_zero = 0;
++ /* Control byte of 8-bit values: ( 0 - means byte as is, 1 - short pair ) */
++ u8 ohdr = 0;
++ u8 *last;
++ u16 t16;
++
++ if (unc + LZNT_CHUNK_SIZE < unc_end)
++ unc_end = unc + LZNT_CHUNK_SIZE;
++
++ last = min(cmpr + LZNT_CHUNK_SIZE + sizeof(short), cmpr_end);
++
++ ctx->unc = unc;
++ ctx->unc_end = unc_end;
++ ctx->max_len = s_max_len[0];
++
++ while (up < unc_end) {
++ size_t max_len;
++
++ while (unc + s_max_off[idx] < up)
++ ctx->max_len = s_max_len[++idx];
++
++ // Find match
++ max_len = up + 3 <= unc_end ? (*match)(up, ctx) : 0;
++
++ if (!max_len) {
++ if (cp >= last)
++ goto NotCompressed;
++ not_zero |= *cp++ = *up++;
++ } else if (cp + 1 >= last) {
++ goto NotCompressed;
++ } else {
++ t16 = make_pair(up - ctx->best_match, max_len, idx);
++ *cp++ = t16;
++ *cp++ = t16 >> 8;
++
++ ohdr |= 1 << cnt;
++ up += max_len;
++ }
++
++ cnt = (cnt + 1) & 7;
++ if (!cnt) {
++ *cp2 = ohdr;
++ ohdr = 0;
++ cp2 = cp;
++ cp += 1;
++ }
++ }
++
++ if (cp2 < last)
++ *cp2 = ohdr;
++ else
++ cp -= 1;
++
++ *cmpr_chunk_size = cp - cmpr;
++
++ t16 = (*cmpr_chunk_size - 3) | 0xB000;
++ cmpr[0] = t16;
++ cmpr[1] = t16 >> 8;
++
++ return not_zero ? 0 : LZNT_ERROR_ALL_ZEROS;
++
++NotCompressed:
++
++ if ((cmpr + LZNT_CHUNK_SIZE + sizeof(short)) > last)
++ return -2;
++
++ /*
++ * Copy non cmpr data
++ * 0x3FFF == ((LZNT_CHUNK_SIZE + 2 - 3) | 0x3000)
++ */
++ cmpr[0] = 0xff;
++ cmpr[1] = 0x3f;
++
++ memcpy(cmpr + sizeof(short), unc, LZNT_CHUNK_SIZE);
++ *cmpr_chunk_size = LZNT_CHUNK_SIZE + sizeof(short);
++
++ return 0;
++}
++
++static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr,
++ const u8 *cmpr_end)
++{
++ u8 *up = unc;
++ u8 ch = *cmpr++;
++ size_t bit = 0;
++ size_t index = 0;
++ u16 pair;
++ size_t offset, length;
++
++ /* Do decompression until pointers are inside range */
++ while (up < unc_end && cmpr < cmpr_end) {
++ /* Correct index */
++ while (unc + s_max_off[index] < up)
++ index += 1;
++
++ /* Check the current flag for zero */
++ if (!(ch & (1 << bit))) {
++ /* Just copy byte */
++ *up++ = *cmpr++;
++ goto next;
++ }
++
++ /* Check for boundary */
++ if (cmpr + 1 >= cmpr_end)
++ return -EINVAL;
++
++ /* Read a short from little endian stream */
++ pair = cmpr[1];
++ pair <<= 8;
++ pair |= cmpr[0];
++
++ cmpr += 2;
++
++ /* Translate packed information into offset and length */
++ length = parse_pair(pair, &offset, index);
++
++ /* Check offset for boundary */
++ if (unc + offset > up)
++ return -EINVAL;
++
++ /* Truncate the length if necessary */
++ if (up + length >= unc_end)
++ length = unc_end - up;
++
++ /* Now we copy bytes. This is the heart of LZ algorithm. */
++ for (; length > 0; length--, up++)
++ *up = *(up - offset);
++
++next:
++ /* Advance flag bit value */
++ bit = (bit + 1) & 7;
++
++ if (!bit) {
++ if (cmpr >= cmpr_end)
++ break;
++
++ ch = *cmpr++;
++ }
++ }
++
++ /* return the size of uncompressed data */
++ return up - unc;
++}
++
++/*
++ * 0 - standard compression
++ * !0 - best compression, requires a lot of cpu
++ */
++struct lznt *get_lznt_ctx(int level)
++{
++ struct lznt *r = ntfs_alloc(
++ level ? offsetof(struct lznt, hash) : sizeof(struct lznt), 1);
++
++ if (r)
++ r->std = !level;
++ return r;
++}
++
++/*
++ * compress_lznt
++ *
++ * Compresses "unc" into "cmpr"
++ * +x - ok, 'cmpr' contains 'final_compressed_size' bytes of compressed data
++ * 0 - input buffer is full zero
++ */
++size_t compress_lznt(const void *unc, size_t unc_size, void *cmpr,
++ size_t cmpr_size, struct lznt *ctx)
++{
++ int err;
++ size_t (*match)(const u8 *src, struct lznt *ctx);
++ u8 *p = cmpr;
++ u8 *end = p + cmpr_size;
++ const u8 *unc_chunk = unc;
++ const u8 *unc_end = unc_chunk + unc_size;
++ bool is_zero = true;
++
++ if (ctx->std) {
++ match = &longest_match_std;
++ memset(ctx->hash, 0, sizeof(ctx->hash));
++ } else {
++ match = &longest_match_best;
++ }
++
++ /* compression cycle */
++ for (; unc_chunk < unc_end; unc_chunk += LZNT_CHUNK_SIZE) {
++ cmpr_size = 0;
++ err = compress_chunk(match, unc_chunk, unc_end, p, end,
++ &cmpr_size, ctx);
++ if (err < 0)
++ return unc_size;
++
++ if (is_zero && err != LZNT_ERROR_ALL_ZEROS)
++ is_zero = false;
++
++ p += cmpr_size;
++ }
++
++ if (p <= end - 2)
++ p[0] = p[1] = 0;
++
++ return is_zero ? 0 : PtrOffset(cmpr, p);
++}
++
++/*
++ * decompress_lznt
++ *
++ * decompresses "cmpr" into "unc"
++ */
++ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc,
++ size_t unc_size)
++{
++ const u8 *cmpr_chunk = cmpr;
++ const u8 *cmpr_end = cmpr_chunk + cmpr_size;
++ u8 *unc_chunk = unc;
++ u8 *unc_end = unc_chunk + unc_size;
++ u16 chunk_hdr;
++
++ if (cmpr_size < sizeof(short))
++ return -EINVAL;
++
++ /* read chunk header */
++ chunk_hdr = cmpr_chunk[1];
++ chunk_hdr <<= 8;
++ chunk_hdr |= cmpr_chunk[0];
++
++ /* loop through decompressing chunks */
++ for (;;) {
++ size_t chunk_size_saved;
++ size_t unc_use;
++ size_t cmpr_use = 3 + (chunk_hdr & (LZNT_CHUNK_SIZE - 1));
++
++ /* Check that the chunk actually fits the supplied buffer */
++ if (cmpr_chunk + cmpr_use > cmpr_end)
++ return -EINVAL;
++
++ /* First make sure the chunk contains compressed data */
++ if (chunk_hdr & 0x8000) {
++ /* Decompress a chunk and return if we get an error */
++ ssize_t err =
++ decompress_chunk(unc_chunk, unc_end,
++ cmpr_chunk + sizeof(chunk_hdr),
++ cmpr_chunk + cmpr_use);
++ if (err < 0)
++ return err;
++ unc_use = err;
++ } else {
++ /* This chunk does not contain compressed data */
++ unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end ?
++ unc_end - unc_chunk :
++ LZNT_CHUNK_SIZE;
++
++ if (cmpr_chunk + sizeof(chunk_hdr) + unc_use >
++ cmpr_end) {
++ return -EINVAL;
++ }
++
++ memcpy(unc_chunk, cmpr_chunk + sizeof(chunk_hdr),
++ unc_use);
++ }
++
++ /* Advance pointers */
++ cmpr_chunk += cmpr_use;
++ unc_chunk += unc_use;
++
++ /* Check for the end of unc buffer */
++ if (unc_chunk >= unc_end)
++ break;
++
++ /* Proceed the next chunk */
++ if (cmpr_chunk > cmpr_end - 2)
++ break;
++
++ chunk_size_saved = LZNT_CHUNK_SIZE;
++
++ /* read chunk header */
++ chunk_hdr = cmpr_chunk[1];
++ chunk_hdr <<= 8;
++ chunk_hdr |= cmpr_chunk[0];
++
++ if (!chunk_hdr)
++ break;
++
++ /* Check the size of unc buffer */
++ if (unc_use < chunk_size_saved) {
++ size_t t1 = chunk_size_saved - unc_use;
++ u8 *t2 = unc_chunk + t1;
++
++ /* 'Zero' memory */
++ if (t2 >= unc_end)
++ break;
++
++ memset(unc_chunk, 0, t1);
++ unc_chunk = t2;
++ }
++ }
++
++ /* Check compression boundary */
++ if (cmpr_chunk > cmpr_end)
++ return -EINVAL;
++
++ /*
++ * The unc size is just a difference between current
++ * pointer and original one
++ */
++ return PtrOffset(unc, unc_chunk);
++}
+--
+2.25.4
+
+
diff --git a/PATCH-v14-07-10-fs-ntfs3-Add-NTFS-journal.patch b/PATCH-v16-07-10-fs-ntfs3-Add-NTFS-journal.patch
similarity index 98%
rename from PATCH-v14-07-10-fs-ntfs3-Add-NTFS-journal.patch
rename to PATCH-v16-07-10-fs-ntfs3-Add-NTFS-journal.patch
index 62d1c99..89ac820 100644
--- a/PATCH-v14-07-10-fs-ntfs3-Add-NTFS-journal.patch
+++ b/PATCH-v16-07-10-fs-ntfs3-Add-NTFS-journal.patch
@@ -5,37 +5,37 @@ X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
X-Spam-Level:
X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,
- INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT
- autolearn=ham autolearn_force=no version=3.4.0
+ INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
+ USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id C65E6C2BB48
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:54 +0000 (UTC)
+ by smtp.lore.kernel.org (Postfix) with ESMTP id 1DEB1C433E9
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:45 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id 7A3BE22C9D
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:54 +0000 (UTC)
+ by mail.kernel.org (Postfix) with ESMTP id D5FF022AAA
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:44 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730934AbgLDPtw (ORCPT
+ id S1729328AbgLYNyk (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:49:52 -0500
-Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:37199 "EHLO
+ Fri, 25 Dec 2020 08:54:40 -0500
+Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:50656 "EHLO
relaydlg-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730727AbgLDPtu (ORCPT
+ by vger.kernel.org with ESMTP id S1726322AbgLYNyi (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:49:50 -0500
+ Fri, 25 Dec 2020 08:54:38 -0500
Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id CF1D582260;
- Fri, 4 Dec 2020 18:48:08 +0300 (MSK)
+ by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id DF238821E5;
+ Fri, 25 Dec 2020 16:53:27 +0300 (MSK)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096888;
+ d=paragon-software.com; s=mail; t=1608904407;
bh=Jb6h+aSbr2Q7XByjhYbQv1GBIcAPMzu95x1r0h6lAt4=;
h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=L7NMLoFgrDmhI4uhrqXFwzPW2zCdvaybw+TinL2Q6AJC8T6EZwBA4qHkolOex1Qr1
- l8PVkjp5kva485tRnS8l3hyC+Fuh/3xGfTpVxdZi9dJHvtRvJLyMEA8CTD9X5KsKaV
- A4IlJ2fJ8fobGA++1ZYgxPGhYG+yyQcm4UxfsoN4=
+ b=RYRPJ5qh5VbU7MqdSCybpFQD7x0dEbSnjKIbBZBXukLzJkBPxKYghE/wMBgNUiBE6
+ Qqo0kXbAf01LW+g0bBPUzXMRPXDMrwJlzW/OxpZk2iYznRH4j2K5uCHvEbWDiE28ST
+ isJ2RWfNq//jfKVQYMcYvZFxYk3jAkkqiknv/Yf0=
Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:07 +0300
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:27 +0300
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
To: <linux-fsdevel@vger.kernel.org>
CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
@@ -45,12 +45,12 @@ CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
<linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
<dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 07/10] fs/ntfs3: Add NTFS journal
-Date: Fri, 4 Dec 2020 18:45:57 +0300
-Message-ID: <20201204154600.1546096-8-almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 07/10] fs/ntfs3: Add NTFS journal
+Date: Fri, 25 Dec 2020 16:51:16 +0300
+Message-ID: <20201225135119.3666763-8-almaz.alexandrovich@paragon-software.com>
X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
@@ -60,7 +60,7 @@ X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-8-almaz.alexandrovich@paragon-software.com/>
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-8-almaz.alexandrovich@paragon-software.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
diff --git a/PATCH-v14-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch b/PATCH-v16-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch
similarity index 80%
rename from PATCH-v14-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch
rename to PATCH-v16-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch
index 1069abc..5179574 100644
--- a/PATCH-v14-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch
+++ b/PATCH-v16-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch
@@ -5,37 +5,37 @@ X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
X-Spam-Level:
X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,
- INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT
- autolearn=ham autolearn_force=no version=3.4.0
+ INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
+ USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id BEE14C433FE
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:48:51 +0000 (UTC)
+ by smtp.lore.kernel.org (Postfix) with ESMTP id A2828C43381
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:55:12 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id 678E822B45
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:48:51 +0000 (UTC)
+ by mail.kernel.org (Postfix) with ESMTP id 7F72223139
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:55:12 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730746AbgLDPsu (ORCPT
+ id S1729194AbgLYNyx (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:48:50 -0500
-Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:37139 "EHLO
+ Fri, 25 Dec 2020 08:54:53 -0500
+Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:50668 "EHLO
relaydlg-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730726AbgLDPsu (ORCPT
+ by vger.kernel.org with ESMTP id S1726330AbgLYNyL (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:48:50 -0500
+ Fri, 25 Dec 2020 08:54:11 -0500
Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id 48E358225E;
- Fri, 4 Dec 2020 18:48:08 +0300 (MSK)
+ by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id 0DED3821EA;
+ Fri, 25 Dec 2020 16:53:28 +0300 (MSK)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096888;
- bh=dQx82N1l3xxVYv2e6MTaG38j7qLgnkIAsCejNitBUpc=;
+ d=paragon-software.com; s=mail; t=1608904408;
+ bh=Rcj18t1V+4cKUqwx4dhyTMZ1FYj8KVbuPQgwv+4Z0UY=;
h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=svoPyTjyqy1dTR4tEjZVNxfWSFG5VRAchYhM8GFTRgYRVWsaVHFJ06B/dPvLewD9C
- M7oebZqJJI0kBysJzwS1s0RStwLJRR6FhMD05qImn4DhwaPvIhVmesFC/7v6LSGG9q
- t0KkNFbs5/yDMMJ8pSXEFpezSZ+Xj4o9Wiqe6+uU=
+ b=LFfUIojt+FD199EJCP0Ckzl8dZyWxC+iOOqpMCXce/XLKixBylvBI1vzzUSoedxaD
+ BDV//vuiy6t1gMWZGDn5tqhLZrPwoPs2CAWXoQyPnUGMhtqxP2jFVlgII+aKmUrcTF
+ OuT9JhxISkTm4QvumFHu/TJTEVDMmzBulfFzVnUA=
Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:07 +0300
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:27 +0300
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
To: <linux-fsdevel@vger.kernel.org>
CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
@@ -45,12 +45,12 @@ CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
<linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
<dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 08/10] fs/ntfs3: Add Kconfig, Makefile and doc
-Date: Fri, 4 Dec 2020 18:45:58 +0300
-Message-ID: <20201204154600.1546096-9-almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 08/10] fs/ntfs3: Add Kconfig, Makefile and doc
+Date: Fri, 25 Dec 2020 16:51:17 +0300
+Message-ID: <20201225135119.3666763-9-almaz.alexandrovich@paragon-software.com>
X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
@@ -60,7 +60,7 @@ X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-9-almaz.alexandrovich@paragon-software.com/>
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-9-almaz.alexandrovich@paragon-software.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
@@ -69,9 +69,9 @@ This adds Kconfig, Makefile and doc
Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
---
Documentation/filesystems/ntfs3.rst | 107 ++++++++++++++++++++++++++++
- fs/ntfs3/Kconfig | 31 ++++++++
- fs/ntfs3/Makefile | 32 +++++++++
- 3 files changed, 170 insertions(+)
+ fs/ntfs3/Kconfig | 41 +++++++++++
+ fs/ntfs3/Makefile | 31 ++++++++
+ 3 files changed, 179 insertions(+)
create mode 100644 Documentation/filesystems/ntfs3.rst
create mode 100644 fs/ntfs3/Kconfig
create mode 100644 fs/ntfs3/Makefile
@@ -191,10 +191,10 @@ index 000000000000..fb29067360cc
+
diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig
new file mode 100644
-index 000000000000..b446651042af
+index 000000000000..f9b732f4a5a0
--- /dev/null
+++ b/fs/ntfs3/Kconfig
-@@ -0,0 +1,31 @@
+@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config NTFS3_FS
+ tristate "NTFS Read-Write file system support"
@@ -226,12 +226,22 @@ index 000000000000..b446651042af
+ In Windows 10 one can use command "compact" to compress any files.
+ 4 possible variants of compression are: xpress4k, xpress8k, xpress16 and lzx.
+ To read such "compacted" files say Y here.
++
++config NTFS3_POSIX_ACL
++ bool "NTFS POSIX Access Control Lists"
++ depends on NTFS3_FS
++ select FS_POSIX_ACL
++ help
++ POSIX Access Control Lists (ACLs) support additional access rights
++ for users and groups beyond the standard owner/group/world scheme,
++ and this option selects support for ACLs specifically for ntfs
++ filesystems.
diff --git a/fs/ntfs3/Makefile b/fs/ntfs3/Makefile
new file mode 100644
-index 000000000000..60151bea5286
+index 000000000000..b9aacc061781
--- /dev/null
+++ b/fs/ntfs3/Makefile
-@@ -0,0 +1,32 @@
+@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the ntfs3 filesystem support.
@@ -260,7 +270,6 @@ index 000000000000..60151bea5286
+
+ntfs3-$(CONFIG_NTFS3_LZX_XPRESS) += $(addprefix lib/,\
+ decompress_common.o \
-+ lzx_common.o \
+ lzx_decompress.o \
+ xpress_decompress.o \
+ )
diff --git a/PATCH-v14-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch b/PATCH-v16-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch
similarity index 68%
rename from PATCH-v14-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch
rename to PATCH-v16-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch
index 45464a1..2f27a06 100644
--- a/PATCH-v14-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch
+++ b/PATCH-v16-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch
@@ -6,36 +6,36 @@ X-Spam-Level:
X-Spam-Status: No, score=-18.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,
INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,
- USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0
+ USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id F410EC433FE
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:52 +0000 (UTC)
+ by smtp.lore.kernel.org (Postfix) with ESMTP id DFB3BC433E9
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:25 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id B9E9022C9D
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:52 +0000 (UTC)
+ by mail.kernel.org (Postfix) with ESMTP id 9FD2022AAA
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:54:25 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730893AbgLDPt2 (ORCPT
+ id S1727358AbgLYNyP (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:49:28 -0500
-Received: from relayfre-01.paragon-software.com ([176.12.100.13]:55286 "EHLO
- relayfre-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730727AbgLDPsv (ORCPT
+ Fri, 25 Dec 2020 08:54:15 -0500
+Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:50680 "EHLO
+ relaydlg-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
+ by vger.kernel.org with ESMTP id S1726345AbgLYNyK (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:48:51 -0500
+ Fri, 25 Dec 2020 08:54:10 -0500
Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relayfre-01.paragon-software.com (Postfix) with ESMTPS id 95FFA1D47;
- Fri, 4 Dec 2020 18:48:08 +0300 (MSK)
+ by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id 1AE9582200;
+ Fri, 25 Dec 2020 16:53:28 +0300 (MSK)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096888;
+ d=paragon-software.com; s=mail; t=1608904408;
bh=ryC1O+KjPCuBYVk17xa91BOxLPmINLD4FPXxdPXSapU=;
h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=PP6/eDhU8d4w6CoOPhsTiBYgdVwKq0Y3tIy7vMftv1xXYhjtfn2WuBMgGQ5LOTDDE
- HWddeM3W8t5XZ1D5Vflh/Q5JA+ZxSNgEsjmm1efJD9YB937NUDAWRsZhEDLkMudT1V
- ZlQPVdD8D8v8OVErUquEUoC0QkpUPWmtiu1Ui+v0=
+ b=KNmBbjVieuPpHeraY1gUG6OkAuHgsrNxemooPI8NtWWvN9IipvBFAT0a+rPVljvKd
+ u61If37WP6QLSo5o8lt4FDWDR/TAVT3JWLbrSO22ORJEaeqs1HOEVtc4271XbPJwgL
+ 6F1Mp0qqytIRPMZgdZOkJ1RYL6YHFZ6U1MePC78A=
Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:08 +0300
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:27 +0300
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
To: <linux-fsdevel@vger.kernel.org>
CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
@@ -45,12 +45,12 @@ CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
<linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
<dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 09/10] fs/ntfs3: Add NTFS3 in fs/Kconfig and fs/Makefile
-Date: Fri, 4 Dec 2020 18:45:59 +0300
-Message-ID: <20201204154600.1546096-10-almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 09/10] fs/ntfs3: Add NTFS3 in fs/Kconfig and fs/Makefile
+Date: Fri, 25 Dec 2020 16:51:18 +0300
+Message-ID: <20201225135119.3666763-10-almaz.alexandrovich@paragon-software.com>
X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
@@ -60,7 +60,7 @@ X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-10-almaz.alexandrovich@paragon-software.com/>
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-10-almaz.alexandrovich@paragon-software.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
diff --git a/PATCH-v14-10-10-fs-ntfs3-Add-MAINTAINERS.patch b/PATCH-v16-10-10-fs-ntfs3-Add-MAINTAINERS.patch
similarity index 63%
rename from PATCH-v14-10-10-fs-ntfs3-Add-MAINTAINERS.patch
rename to PATCH-v16-10-10-fs-ntfs3-Add-MAINTAINERS.patch
index 71f6278..4df70e6 100644
--- a/PATCH-v14-10-10-fs-ntfs3-Add-MAINTAINERS.patch
+++ b/PATCH-v16-10-10-fs-ntfs3-Add-MAINTAINERS.patch
@@ -6,36 +6,37 @@ X-Spam-Level:
X-Spam-Status: No, score=-23.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,
INCLUDES_PATCH,MAILING_LIST_MULTI,MENTIONS_GIT_HOSTING,SPF_HELO_NONE,SPF_PASS,
- URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0
+ URIBL_BLOCKED,USER_AGENT_GIT autolearn=unavailable autolearn_force=no
+ version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
- by smtp.lore.kernel.org (Postfix) with ESMTP id 738F5C4361A
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:25 +0000 (UTC)
+ by smtp.lore.kernel.org (Postfix) with ESMTP id B0306C4332B
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:55:12 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
- by mail.kernel.org (Postfix) with ESMTP id 4C59422C9C
- for <linux-kernel@archiver.kernel.org>; Fri, 4 Dec 2020 15:49:25 +0000 (UTC)
+ by mail.kernel.org (Postfix) with ESMTP id 967B82313F
+ for <linux-kernel@archiver.kernel.org>; Fri, 25 Dec 2020 13:55:12 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
- id S1730769AbgLDPsw (ORCPT
+ id S1729423AbgLYNy7 (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
- Fri, 4 Dec 2020 10:48:52 -0500
-Received: from relaydlg-01.paragon-software.com ([81.5.88.159]:37152 "EHLO
- relaydlg-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
- by vger.kernel.org with ESMTP id S1730728AbgLDPsv (ORCPT
+ Fri, 25 Dec 2020 08:54:59 -0500
+Received: from relayfre-01.paragon-software.com ([176.12.100.13]:47354 "EHLO
+ relayfre-01.paragon-software.com" rhost-flags-OK-OK-OK-OK)
+ by vger.kernel.org with ESMTP id S1726322AbgLYNyv (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
- Fri, 4 Dec 2020 10:48:51 -0500
+ Fri, 25 Dec 2020 08:54:51 -0500
Received: from dlg2.mail.paragon-software.com (vdlg-exch-02.paragon-software.com [172.30.1.105])
- by relaydlg-01.paragon-software.com (Postfix) with ESMTPS id C11ED8225F;
- Fri, 4 Dec 2020 18:48:08 +0300 (MSK)
+ by relayfre-01.paragon-software.com (Postfix) with ESMTPS id 554E11D74;
+ Fri, 25 Dec 2020 16:53:28 +0300 (MSK)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=paragon-software.com; s=mail; t=1607096888;
- bh=pS9OQ0KZ2/ghyosrgOW3lEVOpVqhnUxyyI5C43PX4LY=;
+ d=paragon-software.com; s=mail; t=1608904408;
+ bh=5edRUcHcrLDkIb9EPFaEZwgjmGaBWS5lAiZUoMbv+LY=;
h=From:To:CC:Subject:Date:In-Reply-To:References;
- b=LzyfrI2r4mkNsf0DoeKgXYF7eAG1qupIRFvipvJb0haO3Zf5zacmixitwj1g0rbaa
- DGIilAeYYpWS4EV6EAvcL/q468JqCkeVhxI3so2uTG5OwjE7XdlicU5HvBYv2cVUEr
- 8b8+5VPcqnr6rQnoqcKML2QFb0GsghwIgU9DCIRY=
+ b=q/mEawAf+TMQVd2AtMO+e+IXI+iHdgiMQOaoFfBlgXhS+7z7llmsOAwHnsexQ9TWH
+ q92Oh527oiuCFW1wNCmnQYZI9K85gCUAM/mpmGJ9e9DROW9ldDAXz94FBxAQdE1pIG
+ YqUAtwMsnWPdBWgNvHKxWu0SMTA7IJbygqukfy04=
Received: from fsd-lkpg.ufsd.paragon-software.com (172.30.114.105) by
vdlg-exch-02.paragon-software.com (172.30.1.105) with Microsoft SMTP Server
(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
- 15.1.1847.3; Fri, 4 Dec 2020 18:48:08 +0300
+ 15.1.1847.3; Fri, 25 Dec 2020 16:53:27 +0300
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
To: <linux-fsdevel@vger.kernel.org>
CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
@@ -45,12 +46,12 @@ CC: <viro@zeniv.linux.org.uk>, <linux-kernel@vger.kernel.org>,
<linux-ntfs-dev@lists.sourceforge.net>, <anton@tuxera.com>,
<dan.carpenter@oracle.com>, <hch@lst.de>, <ebiggers@kernel.org>,
Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-Subject: [PATCH v14 10/10] fs/ntfs3: Add MAINTAINERS
-Date: Fri, 4 Dec 2020 18:46:00 +0300
-Message-ID: <20201204154600.1546096-11-almaz.alexandrovich@paragon-software.com>
+Subject: [PATCH v16 10/10] fs/ntfs3: Add MAINTAINERS
+Date: Fri, 25 Dec 2020 16:51:19 +0300
+Message-ID: <20201225135119.3666763-11-almaz.alexandrovich@paragon-software.com>
X-Mailer: git-send-email 2.25.4
-In-Reply-To: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
-References: <20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com>
+In-Reply-To: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
+References: <20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain
@@ -60,7 +61,7 @@ X-ClientProxiedBy: vdlg-exch-02.paragon-software.com (172.30.1.105) To
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
-Archived-At: <https://lore.kernel.org/lkml/20201204154600.1546096-11-almaz.alexandrovich@paragon-software.com/>
+Archived-At: <https://lore.kernel.org/lkml/20201225135119.3666763-11-almaz.alexandrovich@paragon-software.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
@@ -72,10 +73,10 @@ Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
1 file changed, 7 insertions(+)
diff --git a/MAINTAINERS b/MAINTAINERS
-index ebe4829cdd4d..82f06a99b7f0 100644
+index 32944ecc5733..5260e1939798 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
-@@ -12470,6 +12470,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/aia21/ntfs.git
+@@ -12674,6 +12674,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/aia21/ntfs.git
F: Documentation/filesystems/ntfs.rst
F: fs/ntfs/
diff --git a/aarch64-desktop-omv-defconfig b/aarch64-desktop-omv-defconfig
index aa1d2ea..ba02a98 100644
--- a/aarch64-desktop-omv-defconfig
+++ b/aarch64-desktop-omv-defconfig
@@ -1026,6 +1026,8 @@ CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_BOUNCE=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+# CONFIG_UKSM is not set
+CONFIG_KSM_LEGACY=y
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_MEMORY_FAILURE=y
@@ -9597,6 +9599,7 @@ CONFIG_NTFS_FS=m
# CONFIG_NTFS_RW is not set
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/aarch64-server-omv-defconfig b/aarch64-server-omv-defconfig
index 8745443..7112a0c 100644
--- a/aarch64-server-omv-defconfig
+++ b/aarch64-server-omv-defconfig
@@ -1026,6 +1026,8 @@ CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_BOUNCE=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+# CONFIG_UKSM is not set
+CONFIG_KSM_LEGACY=y
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_MEMORY_FAILURE=y
@@ -9597,6 +9599,7 @@ CONFIG_NTFS_FS=m
# CONFIG_NTFS_RW is not set
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/armv7hnl-desktop-omv-defconfig b/armv7hnl-desktop-omv-defconfig
index 2727e5c..901ca4d 100644
--- a/armv7hnl-desktop-omv-defconfig
+++ b/armv7hnl-desktop-omv-defconfig
@@ -1090,6 +1090,8 @@ CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_BOUNCE=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+# CONFIG_UKSM is not set
+CONFIG_KSM_LEGACY=y
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
# CONFIG_TRANSPARENT_HUGEPAGE is not set
CONFIG_CLEANCACHE=y
@@ -10736,6 +10738,7 @@ CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/armv7hnl-server-omv-defconfig b/armv7hnl-server-omv-defconfig
index bde81a1..512597b 100644
--- a/armv7hnl-server-omv-defconfig
+++ b/armv7hnl-server-omv-defconfig
@@ -1090,6 +1090,8 @@ CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_BOUNCE=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+# CONFIG_UKSM is not set
+CONFIG_KSM_LEGACY=y
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
# CONFIG_TRANSPARENT_HUGEPAGE is not set
CONFIG_CLEANCACHE=y
@@ -10736,6 +10738,7 @@ CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/i686-desktop-gcc-omv-defconfig b/i686-desktop-gcc-omv-defconfig
index 2b40a45..a496c17 100644
--- a/i686-desktop-gcc-omv-defconfig
+++ b/i686-desktop-gcc-omv-defconfig
@@ -1005,6 +1005,8 @@ CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+CONFIG_UKSM=y
+# CONFIG_KSM_LEGACY is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
@@ -9622,6 +9624,7 @@ CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/i686-server-gcc-omv-defconfig b/i686-server-gcc-omv-defconfig
index a41ac2f..f034eaf 100644
--- a/i686-server-gcc-omv-defconfig
+++ b/i686-server-gcc-omv-defconfig
@@ -1006,6 +1006,8 @@ CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+CONFIG_UKSM=y
+# CONFIG_KSM_LEGACY is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
@@ -9621,6 +9623,7 @@ CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/kernel-release.spec b/kernel-release.spec
index 4c2cf88..de36506 100644
--- a/kernel-release.spec
+++ b/kernel-release.spec
@@ -22,7 +22,7 @@
# compose tar.xz name and release
%define kernelversion 5
%define patchlevel 10
-%define sublevel 3
+%define sublevel 4
%define relc %{nil}
# Only ever wrong on x.0 releases...
%define previous %{kernelversion}.%(echo $((%{patchlevel}-1)))
@@ -70,7 +70,7 @@
%bcond_with build_doc
# UKSM disabled for 5.10-rc as it needs rebasing
%ifarch %{ix86} %{x86_64}
-%bcond_with uksm
+%bcond_without uksm
%else
%bcond_with uksm
%endif
@@ -273,8 +273,8 @@ Patch41: workaround-aarch64-module-loader.patch
# http://kerneldedup.org/en/projects/uksm/download/
# sources can be found here https://github.com/dolohow/uksm
%if %{with uksm}
-# brokes armx builds
-Patch42: https://raw.githubusercontent.com/dolohow/uksm/master/v5.x/uksm-5.9.patch
+# breaks armx builds
+Patch42: https://raw.githubusercontent.com/dolohow/uksm/master/v5.x/uksm-5.10.patch
%endif
# (crazy) see: https://forum.openmandriva.org/t/nvme-ssd-m2-not-seen-by-omlx-4-0/2407
@@ -333,35 +333,39 @@ Patch211: https://github.com/sirlucjan/kernel-patches/blob/master/5.2/cpu-patche
Patch212: https://salsa.debian.org/kernel-team/linux/raw/master/debian/patches/debian/android-enable-building-ashmem-and-binder-as-modules.patch
Patch213: https://salsa.debian.org/kernel-team/linux/raw/master/debian/patches/debian/export-symbols-needed-by-android-drivers.patch
+# https://gitweb.frugalware.org/frugalware-current/commit/bc3e827af39a321efd770ba4f4de63bca2853471
+Patch214: https://gitweb.frugalware.org/frugalware-current/raw/master/source/base/kernel/nvme-Patriot_Viper_VPN100-QUIRK_IGNORE_DEV_SUBNQN.patch
+
# AMDGPU fixes
# https://patchwork.freedesktop.org/patch/408230/
Patch220: 8353d30e747f-drm-amd-display-disable-stream-if-pixel-clock-changed-with-link-active.patch
# k10temp fixes
-Patch221: https://gitweb.frugalware.org/frugalware-current/raw/2fe3eaa10ecbeb59db965230a1d1aa0a775f6b5a/source/base/kernel/k10temp-fix-ZEN2-desktop-add-ZEN3-desktop.patch
+Patch221: https://gitweb.frugalware.org/frugalware-current/raw/master/source/base/kernel/0001-Revert-hwmon-k10temp-Remove-support-for-displaying-v.patch
+Patch222: https://gitweb.frugalware.org/frugalware-current/raw/2fe3eaa10ecbeb59db965230a1d1aa0a775f6b5a/source/base/kernel/k10temp-fix-ZEN2-desktop-add-ZEN3-desktop.patch
# Backported extra AMD drivers
-Patch222: https://gitweb.frugalware.org/frugalware-current/raw/e4ce7d381051c513cf9ba5443b255534d48ce90a/source/base/kernel/add-amd-sfh-hid_driver.patch
-Patch223: https://gitweb.frugalware.org/frugalware-current/raw/e4ce7d381051c513cf9ba5443b255534d48ce90a/source/base/kernel/add-sbtsi_driver.patch
-Patch224: https://gitweb.frugalware.org/frugalware-current/raw/9feb87fc5d15fc0b31f5e0cfa2bab188c4e6575a/source/base/kernel/enable-new-amd-energy-driver-for-all-ryzen.patch
+Patch223: https://gitweb.frugalware.org/frugalware-current/raw/e4ce7d381051c513cf9ba5443b255534d48ce90a/source/base/kernel/add-amd-sfh-hid_driver.patch
+Patch224: https://gitweb.frugalware.org/frugalware-current/raw/e4ce7d381051c513cf9ba5443b255534d48ce90a/source/base/kernel/add-sbtsi_driver.patch
+Patch225: https://gitweb.frugalware.org/frugalware-current/raw/9feb87fc5d15fc0b31f5e0cfa2bab188c4e6575a/source/base/kernel/enable-new-amd-energy-driver-for-all-ryzen.patch
# Fix CPU frequency governor mess caused by recent Intel patches
-Patch225: https://gitweb.frugalware.org/frugalware-current/raw/50690405717979871bb17b8e6b553799a203c6ae/source/base/kernel/0001-Revert-cpufreq-Avoid-configuring-old-governors-as-de.patch
-Patch226: https://gitweb.frugalware.org/frugalware-current/raw/50690405717979871bb17b8e6b553799a203c6ae/source/base/kernel/revert-parts-of-a00ec3874e7d326ab2dffbed92faddf6a77a84e9-no-Intel-NO.patch
+Patch226: https://gitweb.frugalware.org/frugalware-current/raw/50690405717979871bb17b8e6b553799a203c6ae/source/base/kernel/0001-Revert-cpufreq-Avoid-configuring-old-governors-as-de.patch
+Patch227: https://gitweb.frugalware.org/frugalware-current/raw/50690405717979871bb17b8e6b553799a203c6ae/source/base/kernel/revert-parts-of-a00ec3874e7d326ab2dffbed92faddf6a77a84e9-no-Intel-NO.patch
# NTFS kernel patches
-# https://lore.kernel.org/lkml/20201204154600.1546096-1-almaz.alexandrovich@paragon-software.com/
-Patch300: PATCH-v14-01-10-fs-ntfs3-Add-headers-and-misc-files.patch
-Patch301: PATCH-v14-02-10-fs-ntfs3-Add-initialization-of-super-block.patch
-Patch302: PATCH-v14-03-10-fs-ntfs3-Add-bitmap.patch
-Patch303: PATCH-v14-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch
-Patch304: PATCH-v14-05-10-fs-ntfs3-Add-attrib-operations.patch
-Patch305: PATCH-v14-06-10-fs-ntfs3-Add-compression.patch
-Patch306: PATCH-v14-07-10-fs-ntfs3-Add-NTFS-journal.patch
-Patch307: PATCH-v14-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch
-Patch308: PATCH-v14-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch
-Patch309: PATCH-v14-10-10-fs-ntfs3-Add-MAINTAINERS.patch
+# https://lore.kernel.org/lkml/20201225135119.3666763-1-almaz.alexandrovich@paragon-software.com/
+Patch300: PATCH-v16-01-10-fs-ntfs3-Add-headers-and-misc-files.patch
+Patch301: PATCH-v16-02-10-fs-ntfs3-Add-initialization-of-super-block.patch
+Patch302: PATCH-v16-03-10-fs-ntfs3-Add-bitmap.patch
+Patch303: PATCH-v16-04-10-fs-ntfs3-Add-file-operations-and-implementation.patch
+Patch304: PATCH-v16-05-10-fs-ntfs3-Add-attrib-operations.patch
+Patch305: PATCH-v16-06-10-fs-ntfs3-Add-compression.patch
+Patch306: PATCH-v16-07-10-fs-ntfs3-Add-NTFS-journal.patch
+Patch307: PATCH-v16-08-10-fs-ntfs3-Add-Kconfig-Makefile-and-doc.patch
+Patch308: PATCH-v16-09-10-fs-ntfs3-Add-NTFS3-in-fs-Kconfig-and-fs-Makefile.patch
+Patch309: PATCH-v16-10-10-fs-ntfs3-Add-MAINTAINERS.patch
# Patches to external modules
# Marked SourceXXX instead of PatchXXX because the modules
diff --git a/nvme-Patriot_Viper_VPN100-QUIRK_IGNORE_DEV_SUBNQN.patch b/nvme-Patriot_Viper_VPN100-QUIRK_IGNORE_DEV_SUBNQN.patch
new file mode 100644
index 0000000..1f3c5c7
--- /dev/null
+++ b/nvme-Patriot_Viper_VPN100-QUIRK_IGNORE_DEV_SUBNQN.patch
@@ -0,0 +1,13 @@
+diff -Naur linux-5.10.4/drivers/nvme/host/pci.c linux-5.10.4-p/drivers/nvme/host/pci.c
+--- linux-5.10.4/drivers/nvme/host/pci.c 2020-12-30 11:54:29.000000000 +0100
++++ linux-5.10.4-p/drivers/nvme/host/pci.c 2021-01-01 02:02:04.011358821 +0100
+@@ -3224,7 +3224,8 @@
+ .driver_data = NVME_QUIRK_SINGLE_VECTOR |
+ NVME_QUIRK_128_BYTES_SQES |
+ NVME_QUIRK_SHARED_TAGS },
+-
++ { PCI_DEVICE(0x1987, 0x5012), /* Patriot Viper VPN100 */
++ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
+ { 0, }
+ };
diff --git a/uksm-5.10.patch b/uksm-5.10.patch
new file mode 100644
index 0000000..eacd7b6
--- /dev/null
+++ b/uksm-5.10.patch
@@ -0,0 +1,6935 @@
+diff --git a/Documentation/vm/uksm.txt b/Documentation/vm/uksm.txt
+new file mode 100644
+index 000000000000..be19a3127001
+--- /dev/null
++++ b/Documentation/vm/uksm.txt
+@@ -0,0 +1,61 @@
++The Ultra Kernel Samepage Merging feature
++----------------------------------------------
++/*
++ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
++ *
++ * This is an improvement upon KSM. Some basic data structures and routines
++ * are borrowed from ksm.c .
++ *
++ * Its new features:
++ * 1. Full system scan:
++ * It automatically scans all user processes' anonymous VMAs. Kernel-user
++ * interaction to submit a memory area to KSM is no longer needed.
++ *
++ * 2. Rich area detection:
++ * It automatically detects rich areas containing abundant duplicated
++ * pages based. Rich areas are given a full scan speed. Poor areas are
++ * sampled at a reasonable speed with very low CPU consumption.
++ *
++ * 3. Ultra Per-page scan speed improvement:
++ * A new hash algorithm is proposed. As a result, on a machine with
++ * Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
++ * can scan memory areas that does not contain duplicated pages at speed of
++ * 627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
++ * 477MB/sec ~ 923MB/sec.
++ *
++ * 4. Thrashing area avoidance:
++ * Thrashing area(an VMA that has frequent Ksm page break-out) can be
++ * filtered out. My benchmark shows it's more efficient than KSM's per-page
++ * hash value based volatile page detection.
++ *
++ *
++ * 5. Misc changes upon KSM:
++ * * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
++ * comparison. It's much faster than default C version on x86.
++ * * rmap_item now has an struct *page member to loosely cache a
++ * address-->page mapping, which reduces too much time-costly
++ * follow_page().
++ * * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
++ * * try_to_merge_two_pages() now can revert a pte if it fails. No break_
++ * ksm is needed for this case.
++ *
++ * 6. Full Zero Page consideration(contributed by Figo Zhang)
++ * Now uksmd consider full zero pages as special pages and merge them to an
++ * special unswappable uksm zero page.
++ */
++
++ChangeLog:
++
++2012-05-05 The creation of this Doc
++2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up.
++2012-05-28 UKSM 0.1.1.2 bug fix release
++2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2
++2012-07-2 UKSM 0.1.2-beta2
++2012-07-10 UKSM 0.1.2-beta3
++2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization.
++2012-10-13 UKSM 0.1.2.1 Bug fixes.
++2012-12-31 UKSM 0.1.2.2 Minor bug fixes.
++2014-07-02 UKSM 0.1.2.3 Fix a " __this_cpu_read() in preemptible bug".
++2015-04-22 UKSM 0.1.2.4 Fix a race condition that can sometimes trigger anonying warnings.
++2016-09-10 UKSM 0.1.2.5 Fix a bug in dedup ratio calculation.
++2017-02-26 UKSM 0.1.2.6 Fix a bug in hugetlbpage handling and a race bug with page migration.
+diff --git a/fs/exec.c b/fs/exec.c
+index 547a2390baf5..fc64a20db6bd 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -64,6 +64,7 @@
+ #include <linux/compat.h>
+ #include <linux/vmalloc.h>
+ #include <linux/io_uring.h>
++#include <linux/ksm.h>
+
+ #include <linux/uaccess.h>
+ #include <asm/mmu_context.h>
+diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
+index 887a5532e449..581a6762868e 100644
+--- a/fs/proc/meminfo.c
++++ b/fs/proc/meminfo.c
+@@ -108,7 +108,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
+ #endif
+ show_val_kb(m, "PageTables: ",
+ global_zone_page_state(NR_PAGETABLE));
+-
++#ifdef CONFIG_UKSM
++ show_val_kb(m, "KsmZeroPages: ",
++ global_zone_page_state(NR_UKSM_ZERO_PAGES));
++#endif
+ show_val_kb(m, "NFS_Unstable: ", 0);
+ show_val_kb(m, "Bounce: ",
+ global_zone_page_state(NR_BOUNCE));
+diff --git a/include/linux/ksm.h b/include/linux/ksm.h
+index 161e8164abcf..f0dbdf3c986a 100644
+--- a/include/linux/ksm.h
++++ b/include/linux/ksm.h
+@@ -21,20 +21,16 @@ struct mem_cgroup;
+ #ifdef CONFIG_KSM
+ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, int advice, unsigned long *vm_flags);
+-int __ksm_enter(struct mm_struct *mm);
+-void __ksm_exit(struct mm_struct *mm);
+
+-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++static inline struct stable_node *page_stable_node(struct page *page)
+ {
+- if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+- return __ksm_enter(mm);
+- return 0;
++ return PageKsm(page) ? page_rmapping(page) : NULL;
+ }
+
+-static inline void ksm_exit(struct mm_struct *mm)
++static inline void set_page_stable_node(struct page *page,
++ struct stable_node *stable_node)
+ {
+- if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+- __ksm_exit(mm);
++ page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
+ }
+
+ /*
+@@ -54,6 +50,33 @@ struct page *ksm_might_need_to_copy(struct page *page,
+ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
+ void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+
++#ifdef CONFIG_KSM_LEGACY
++int __ksm_enter(struct mm_struct *mm);
++void __ksm_exit(struct mm_struct *mm);
++static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++ if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
++ return __ksm_enter(mm);
++ return 0;
++}
++
++static inline void ksm_exit(struct mm_struct *mm)
++{
++ if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
++ __ksm_exit(mm);
++}
++
++#elif defined(CONFIG_UKSM)
++static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++ return 0;
++}
++
++static inline void ksm_exit(struct mm_struct *mm)
++{
++}
++#endif /* !CONFIG_UKSM */
++
+ #else /* !CONFIG_KSM */
+
+ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+@@ -89,4 +112,6 @@ static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+ #endif /* CONFIG_MMU */
+ #endif /* !CONFIG_KSM */
+
++#include <linux/uksm.h>
++
+ #endif /* __LINUX_KSM_H */
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 5a9238f6caad..5dd1ccf5cb69 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -371,6 +371,9 @@ struct vm_area_struct {
+ struct mempolicy *vm_policy; /* NUMA policy for the VMA */
+ #endif
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
++#ifdef CONFIG_UKSM
++ struct vma_slot *uksm_vma_slot;
++#endif
+ } __randomize_layout;
+
+ struct core_thread {
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index fb3bf696c05e..e4477c3a9a4b 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -159,6 +159,9 @@ enum zone_stat_item {
+ NR_ZSPAGES, /* allocated in zsmalloc */
+ #endif
+ NR_FREE_CMA_PAGES,
++#ifdef CONFIG_UKSM
++ NR_UKSM_ZERO_PAGES,
++#endif
+ NR_VM_ZONE_STAT_ITEMS };
+
+ enum node_stat_item {
+diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
+index e237004d498d..092cdfb7090b 100644
+--- a/include/linux/pgtable.h
++++ b/include/linux/pgtable.h
+@@ -1060,12 +1060,25 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ extern void untrack_pfn_moved(struct vm_area_struct *vma);
+ #endif
+
++#ifdef CONFIG_UKSM
++static inline int is_uksm_zero_pfn(unsigned long pfn)
++{
++ extern unsigned long uksm_zero_pfn;
++ return pfn == uksm_zero_pfn;
++}
++#else
++static inline int is_uksm_zero_pfn(unsigned long pfn)
++{
++ return 0;
++}
++#endif
++
+ #ifdef __HAVE_COLOR_ZERO_PAGE
+ static inline int is_zero_pfn(unsigned long pfn)
+ {
+ extern unsigned long zero_pfn;
+ unsigned long offset_from_zero_pfn = pfn - zero_pfn;
+- return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
++ return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT) || is_uksm_zero_pfn(pfn);
+ }
+
+ #define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr))
+@@ -1074,7 +1087,7 @@ static inline int is_zero_pfn(unsigned long pfn)
+ static inline int is_zero_pfn(unsigned long pfn)
+ {
+ extern unsigned long zero_pfn;
+- return pfn == zero_pfn;
++ return (pfn == zero_pfn) || (is_uksm_zero_pfn(pfn));
+ }
+
+ static inline unsigned long my_zero_pfn(unsigned long addr)
+diff --git a/include/linux/sradix-tree.h b/include/linux/sradix-tree.h
+new file mode 100644
+index 000000000000..d71edba6b63f
+--- /dev/null
++++ b/include/linux/sradix-tree.h
+@@ -0,0 +1,77 @@
++#ifndef _LINUX_SRADIX_TREE_H
++#define _LINUX_SRADIX_TREE_H
++
++
++#define INIT_SRADIX_TREE(root, mask) \
++do { \
++ (root)->height = 0; \
++ (root)->gfp_mask = (mask); \
++ (root)->rnode = NULL; \
++} while (0)
++
++#define ULONG_BITS (sizeof(unsigned long) * 8)
++#define SRADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
++//#define SRADIX_TREE_MAP_SHIFT 6
++//#define SRADIX_TREE_MAP_SIZE (1UL << SRADIX_TREE_MAP_SHIFT)
++//#define SRADIX_TREE_MAP_MASK (SRADIX_TREE_MAP_SIZE-1)
++
++struct sradix_tree_node {
++ unsigned int height; /* Height from the bottom */
++ unsigned int count;
++ unsigned int fulls; /* Number of full sublevel trees */
++ struct sradix_tree_node *parent;
++ void *stores[0];
++};
++
++/* A simple radix tree implementation */
++struct sradix_tree_root {
++ unsigned int height;
++ struct sradix_tree_node *rnode;
++
++ /* Where found to have available empty stores in its sublevels */
++ struct sradix_tree_node *enter_node;
++ unsigned int shift;
++ unsigned int stores_size;
++ unsigned int mask;
++ unsigned long min; /* The first hole index */
++ unsigned long num;
++ //unsigned long *height_to_maxindex;
++
++ /* How the node is allocated and freed. */
++ struct sradix_tree_node *(*alloc)(void);
++ void (*free)(struct sradix_tree_node *node);
++
++ /* When a new node is added and removed */
++ void (*extend)(struct sradix_tree_node *parent, struct sradix_tree_node *child);
++ void (*assign)(struct sradix_tree_node *node, unsigned int index, void *item);
++ void (*rm)(struct sradix_tree_node *node, unsigned int offset);
++};
++
++struct sradix_tree_path {
++ struct sradix_tree_node *node;
++ int offset;
++};
++
++static inline
++void init_sradix_tree_root(struct sradix_tree_root *root, unsigned long shift)
++{
++ root->height = 0;
++ root->rnode = NULL;
++ root->shift = shift;
++ root->stores_size = 1UL << shift;
++ root->mask = root->stores_size - 1;
++}
++
++
++extern void *sradix_tree_next(struct sradix_tree_root *root,
++ struct sradix_tree_node *node, unsigned long index,
++ int (*iter)(void *, unsigned long));
++
++extern int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num);
++
++extern void sradix_tree_delete_from_leaf(struct sradix_tree_root *root,
++ struct sradix_tree_node *node, unsigned long index);
++
++extern void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index);
++
++#endif /* _LINUX_SRADIX_TREE_H */
+diff --git a/include/linux/uksm.h b/include/linux/uksm.h
+new file mode 100644
+index 000000000000..bb8651f534f2
+--- /dev/null
++++ b/include/linux/uksm.h
+@@ -0,0 +1,149 @@
++#ifndef __LINUX_UKSM_H
++#define __LINUX_UKSM_H
++/*
++ * Memory merging support.
++ *
++ * This code enables dynamic sharing of identical pages found in different
++ * memory areas, even if they are not shared by fork().
++ */
++
++/* if !CONFIG_UKSM this file should not be compiled at all. */
++#ifdef CONFIG_UKSM
++
++#include <linux/bitops.h>
++#include <linux/mm.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/sched.h>
++
++extern unsigned long zero_pfn __read_mostly;
++extern unsigned long uksm_zero_pfn __read_mostly;
++extern struct page *empty_uksm_zero_page;
++
++/* must be done before linked to mm */
++extern void uksm_vma_add_new(struct vm_area_struct *vma);
++extern void uksm_remove_vma(struct vm_area_struct *vma);
++
++#define UKSM_SLOT_NEED_SORT (1 << 0)
++#define UKSM_SLOT_NEED_RERAND (1 << 1)
++#define UKSM_SLOT_SCANNED (1 << 2) /* It's scanned in this round */
++#define UKSM_SLOT_FUL_SCANNED (1 << 3)
++#define UKSM_SLOT_IN_UKSM (1 << 4)
++
++struct vma_slot {
++ struct sradix_tree_node *snode;
++ unsigned long sindex;
++
++ struct list_head slot_list;
++ unsigned long fully_scanned_round;
++ unsigned long dedup_num;
++ unsigned long pages_scanned;
++ unsigned long this_sampled;
++ unsigned long last_scanned;
++ unsigned long pages_to_scan;
++ struct scan_rung *rung;
++ struct page **rmap_list_pool;
++ unsigned int *pool_counts;
++ unsigned long pool_size;
++ struct vm_area_struct *vma;
++ struct mm_struct *mm;
++ unsigned long ctime_j;
++ unsigned long pages;
++ unsigned long flags;
++ unsigned long pages_cowed; /* pages cowed this round */
++ unsigned long pages_merged; /* pages merged this round */
++ unsigned long pages_bemerged;
++
++ /* when it has page merged in this eval round */
++ struct list_head dedup_list;
++};
++
++static inline void uksm_unmap_zero_page(pte_t pte)
++{
++ if (pte_pfn(pte) == uksm_zero_pfn)
++ __dec_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
++}
++
++static inline void uksm_map_zero_page(pte_t pte)
++{
++ if (pte_pfn(pte) == uksm_zero_pfn)
++ __inc_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
++}
++
++static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
++{
++ if (vma->uksm_vma_slot && PageKsm(page))
++ vma->uksm_vma_slot->pages_cowed++;
++}
++
++static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
++{
++ if (vma->uksm_vma_slot && pte_pfn(pte) == uksm_zero_pfn)
++ vma->uksm_vma_slot->pages_cowed++;
++}
++
++static inline int uksm_flags_can_scan(unsigned long vm_flags)
++{
++#ifdef VM_SAO
++ if (vm_flags & VM_SAO)
++ return 0;
++#endif
++
++ return !(vm_flags & (VM_PFNMAP | VM_IO | VM_DONTEXPAND |
++ VM_HUGETLB | VM_MIXEDMAP | VM_SHARED
++ | VM_MAYSHARE | VM_GROWSUP | VM_GROWSDOWN));
++}
++
++static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
++{
++ if (uksm_flags_can_scan(*vm_flags_p))
++ *vm_flags_p |= VM_MERGEABLE;
++}
++
++/*
++ * Just a wrapper for BUG_ON for where ksm_zeropage must not be. TODO: it will
++ * be removed when uksm zero page patch is stable enough.
++ */
++static inline void uksm_bugon_zeropage(pte_t pte)
++{
++ BUG_ON(pte_pfn(pte) == uksm_zero_pfn);
++}
++#else
++static inline void uksm_vma_add_new(struct vm_area_struct *vma)
++{
++}
++
++static inline void uksm_remove_vma(struct vm_area_struct *vma)
++{
++}
++
++static inline void uksm_unmap_zero_page(pte_t pte)
++{
++}
++
++static inline void uksm_map_zero_page(pte_t pte)
++{
++}
++
++static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
++{
++}
++
++static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
++{
++}
++
++static inline int uksm_flags_can_scan(unsigned long vm_flags)
++{
++ return 0;
++}
++
++static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
++{
++}
++
++static inline void uksm_bugon_zeropage(pte_t pte)
++{
++}
++#endif /* !CONFIG_UKSM */
++#endif /* __LINUX_UKSM_H */
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 6d266388d380..bb52f8731dd6 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -587,7 +587,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+ __vma_link_rb(mm, tmp, rb_link, rb_parent);
+ rb_link = &tmp->vm_rb.rb_right;
+ rb_parent = &tmp->vm_rb;
+-
++ uksm_vma_add_new(tmp);
+ mm->map_count++;
+ if (!(tmp->vm_flags & VM_WIPEONFORK))
+ retval = copy_page_range(tmp, mpnt);
+diff --git a/lib/Makefile b/lib/Makefile
+index d415fc7067c5..e4045ebec8cd 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -31,7 +31,7 @@ endif
+ KCSAN_SANITIZE_random32.o := n
+
+ lib-y := ctype.o string.o vsprintf.o cmdline.o \
+- rbtree.o radix-tree.o timerqueue.o xarray.o \
++ rbtree.o radix-tree.o sradix-tree.o timerqueue.o xarray.o \
+ idr.o extable.o sha1.o irq_regs.o argv_split.o \
+ flex_proportions.o ratelimit.o show_mem.o \
+ is_single_threaded.o plist.o decompress.o kobject_uevent.o \
+diff --git a/lib/sradix-tree.c b/lib/sradix-tree.c
+new file mode 100644
+index 000000000000..ab21e6309b93
+--- /dev/null
++++ b/lib/sradix-tree.c
+@@ -0,0 +1,476 @@
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/spinlock.h>
++#include <linux/slab.h>
++#include <linux/gcd.h>
++#include <linux/sradix-tree.h>
++
++static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node)
++{
++ return node->fulls == root->stores_size ||
++ (node->height == 1 && node->count == root->stores_size);
++}
++
++/*
++ * Extend a sradix tree so it can store key @index.
++ */
++static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index)
++{
++ struct sradix_tree_node *node;
++ unsigned int height;
++
++ if (unlikely(root->rnode == NULL)) {
++ if (!(node = root->alloc()))
++ return -ENOMEM;
++
++ node->height = 1;
++ root->rnode = node;
++ root->height = 1;
++ }
++
++ /* Figure out what the height should be. */
++ height = root->height;
++ index >>= root->shift * height;
++
++ while (index) {
++ index >>= root->shift;
++ height++;
++ }
++
++ while (height > root->height) {
++ unsigned int newheight;
++
++ if (!(node = root->alloc()))
++ return -ENOMEM;
++
++ /* Increase the height. */
++ node->stores[0] = root->rnode;
++ root->rnode->parent = node;
++ if (root->extend)
++ root->extend(node, root->rnode);
++
++ newheight = root->height + 1;
++ node->height = newheight;
++ node->count = 1;
++ if (sradix_node_full(root, root->rnode))
++ node->fulls = 1;
++
++ root->rnode = node;
++ root->height = newheight;
++ }
++
++ return 0;
++}
++
++/*
++ * Search the next item from the current node, that is not NULL
++ * and can satify root->iter().
++ */
++void *sradix_tree_next(struct sradix_tree_root *root,
++ struct sradix_tree_node *node, unsigned long index,
++ int (*iter)(void *item, unsigned long height))
++{
++ unsigned long offset;
++ void *item;
++
++ if (unlikely(node == NULL)) {
++ node = root->rnode;
++ for (offset = 0; offset < root->stores_size; offset++) {
++ item = node->stores[offset];
++ if (item && (!iter || iter(item, node->height)))
++ break;
++ }
++
++ if (unlikely(offset >= root->stores_size))
++ return NULL;
++
++ if (node->height == 1)
++ return item;
++ else
++ goto go_down;
++ }
++
++ while (node) {
++ offset = (index & root->mask) + 1;
++ for (; offset < root->stores_size; offset++) {
++ item = node->stores[offset];
++ if (item && (!iter || iter(item, node->height)))
++ break;
++ }
++
++ if (offset < root->stores_size)
++ break;
++
++ node = node->parent;
++ index >>= root->shift;
++ }
++
++ if (!node)
++ return NULL;
++
++ while (node->height > 1) {
++go_down:
++ node = item;
++ for (offset = 0; offset < root->stores_size; offset++) {
++ item = node->stores[offset];
++ if (item && (!iter || iter(item, node->height)))
++ break;
++ }
++
++ if (unlikely(offset >= root->stores_size))
++ return NULL;
++ }
++
++ BUG_ON(offset > root->stores_size);
++
++ return item;
++}
++
++/*
++ * Blindly insert the item to the tree. Typically, we reuse the
++ * first empty store item.
++ */
++int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num)
++{
++ unsigned long index;
++ unsigned int height;
++ struct sradix_tree_node *node, *tmp = NULL;
++ int offset, offset_saved;
++ void **store = NULL;
++ int error, i, j, shift;
++
++go_on:
++ index = root->min;
++
++ if (root->enter_node && !sradix_node_full(root, root->enter_node)) {
++ node = root->enter_node;
++ BUG_ON((index >> (root->shift * root->height)));
++ } else {
++ node = root->rnode;
++ if (node == NULL || (index >> (root->shift * root->height))
++ || sradix_node_full(root, node)) {
++ error = sradix_tree_extend(root, index);
++ if (error)
++ return error;
++
++ node = root->rnode;
++ }
++ }
++
++
++ height = node->height;
++ shift = (height - 1) * root->shift;
++ offset = (index >> shift) & root->mask;
++ while (shift > 0) {
++ offset_saved = offset;
++ for (; offset < root->stores_size; offset++) {
++ store = &node->stores[offset];
++ tmp = *store;
++
++ if (!tmp || !sradix_node_full(root, tmp))
++ break;
++ }
++ BUG_ON(offset >= root->stores_size);
++
++ if (offset != offset_saved) {
++ index += (offset - offset_saved) << shift;
++ index &= ~((1UL << shift) - 1);
++ }
++
++ if (!tmp) {
++ if (!(tmp = root->alloc()))
++ return -ENOMEM;
++
++ tmp->height = shift / root->shift;
++ *store = tmp;
++ tmp->parent = node;
++ node->count++;
++// if (root->extend)
++// root->extend(node, tmp);
++ }
++
++ node = tmp;
++ shift -= root->shift;
++ offset = (index >> shift) & root->mask;
++ }
++
++ BUG_ON(node->height != 1);
++
++
++ store = &node->stores[offset];
++ for (i = 0, j = 0;
++ j < root->stores_size - node->count &&
++ i < root->stores_size - offset && j < num; i++) {
++ if (!store[i]) {
++ store[i] = item[j];
++ if (root->assign)
++ root->assign(node, index + i, item[j]);
++ j++;
++ }
++ }
++
++ node->count += j;
++ root->num += j;
++ num -= j;
++
++ while (sradix_node_full(root, node)) {
++ node = node->parent;
++ if (!node)
++ break;
++
++ node->fulls++;
++ }
++
++ if (unlikely(!node)) {
++ /* All nodes are full */
++ root->min = 1 << (root->height * root->shift);
++ root->enter_node = NULL;
++ } else {
++ root->min = index + i - 1;
++ root->min |= (1UL << (node->height - 1)) - 1;
++ root->min++;
++ root->enter_node = node;
++ }
++
++ if (num) {
++ item += j;
++ goto go_on;
++ }
++
++ return 0;
++}
++
++
++/**
++ * sradix_tree_shrink - shrink height of a sradix tree to minimal
++ * @root sradix tree root
++ *
++ */
++static inline void sradix_tree_shrink(struct sradix_tree_root *root)
++{
++ /* try to shrink tree height */
++ while (root->height > 1) {
++ struct sradix_tree_node *to_free = root->rnode;
++
++ /*
++ * The candidate node has more than one child, or its child
++ * is not at the leftmost store, we cannot shrink.
++ */
++ if (to_free->count != 1 || !to_free->stores[0])
++ break;
++
++ root->rnode = to_free->stores[0];
++ root->rnode->parent = NULL;
++ root->height--;
++ if (unlikely(root->enter_node == to_free))
++ root->enter_node = NULL;
++ root->free(to_free);
++ }
++}
++
++/*
++ * Del the item on the known leaf node and index
++ */
++void sradix_tree_delete_from_leaf(struct sradix_tree_root *root,
++ struct sradix_tree_node *node, unsigned long index)
++{
++ unsigned int offset;
++ struct sradix_tree_node *start, *end;
++
++ BUG_ON(node->height != 1);
++
++ start = node;
++ while (node && !(--node->count))
++ node = node->parent;
++
++ end = node;
++ if (!node) {
++ root->rnode = NULL;
++ root->height = 0;
++ root->min = 0;
++ root->num = 0;
++ root->enter_node = NULL;
++ } else {
++ offset = (index >> (root->shift * (node->height - 1))) & root->mask;
++ if (root->rm)
++ root->rm(node, offset);
++ node->stores[offset] = NULL;
++ root->num--;
++ if (root->min > index) {
++ root->min = index;
++ root->enter_node = node;
++ }
++ }
++
++ if (start != end) {
++ do {
++ node = start;
++ start = start->parent;
++ if (unlikely(root->enter_node == node))
++ root->enter_node = end;
++ root->free(node);
++ } while (start != end);
++
++ /*
++ * Note that shrink may free "end", so enter_node still need to
++ * be checked inside.
++ */
++ sradix_tree_shrink(root);
++ } else if (node->count == root->stores_size - 1) {
++ /* It WAS a full leaf node. Update the ancestors */
++ node = node->parent;
++ while (node) {
++ node->fulls--;
++ if (node->fulls != root->stores_size - 1)
++ break;
++
++ node = node->parent;
++ }
++ }
++}
++
++void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index)
++{
++ unsigned int height, offset;
++ struct sradix_tree_node *node;
++ int shift;
++
++ node = root->rnode;
++ if (node == NULL || (index >> (root->shift * root->height)))
++ return NULL;
++
++ height = root->height;
++ shift = (height - 1) * root->shift;
++
++ do {
++ offset = (index >> shift) & root->mask;
++ node = node->stores[offset];
++ if (!node)
++ return NULL;
++
++ shift -= root->shift;
++ } while (shift >= 0);
++
++ return node;
++}
++
++/*
++ * Return the item if it exists, otherwise create it in place
++ * and return the created item.
++ */
++void *sradix_tree_lookup_create(struct sradix_tree_root *root,
++ unsigned long index, void *(*item_alloc)(void))
++{
++ unsigned int height, offset;
++ struct sradix_tree_node *node, *tmp;
++ void *item;
++ int shift, error;
++
++ if (root->rnode == NULL || (index >> (root->shift * root->height))) {
++ if (item_alloc) {
++ error = sradix_tree_extend(root, index);
++ if (error)
++ return NULL;
++ } else {
++ return NULL;
++ }
++ }
++
++ node = root->rnode;
++ height = root->height;
++ shift = (height - 1) * root->shift;
++
++ do {
++ offset = (index >> shift) & root->mask;
++ if (!node->stores[offset]) {
++ if (!(tmp = root->alloc()))
++ return NULL;
++
++ tmp->height = shift / root->shift;
++ node->stores[offset] = tmp;
++ tmp->parent = node;
++ node->count++;
++ node = tmp;
++ } else {
++ node = node->stores[offset];
++ }
++
++ shift -= root->shift;
++ } while (shift > 0);
++
++ BUG_ON(node->height != 1);
++ offset = index & root->mask;
++ if (node->stores[offset]) {
++ return node->stores[offset];
++ } else if (item_alloc) {
++ if (!(item = item_alloc()))
++ return NULL;
++
++ node->stores[offset] = item;
++
++ /*
++ * NOTE: we do NOT call root->assign here, since this item is
++ * newly created by us having no meaning. Caller can call this
++ * if it's necessary to do so.
++ */
++
++ node->count++;
++ root->num++;
++
++ while (sradix_node_full(root, node)) {
++ node = node->parent;
++ if (!node)
++ break;
++
++ node->fulls++;
++ }
++
++ if (unlikely(!node)) {
++ /* All nodes are full */
++ root->min = 1 << (root->height * root->shift);
++ } else {
++ if (root->min == index) {
++ root->min |= (1UL << (node->height - 1)) - 1;
++ root->min++;
++ root->enter_node = node;
++ }
++ }
++
++ return item;
++ } else {
++ return NULL;
++ }
++
++}
++
++int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index)
++{
++ unsigned int height, offset;
++ struct sradix_tree_node *node;
++ int shift;
++
++ node = root->rnode;
++ if (node == NULL || (index >> (root->shift * root->height)))
++ return -ENOENT;
++
++ height = root->height;
++ shift = (height - 1) * root->shift;
++
++ do {
++ offset = (index >> shift) & root->mask;
++ node = node->stores[offset];
++ if (!node)
++ return -ENOENT;
++
++ shift -= root->shift;
++ } while (shift > 0);
++
++ offset = index & root->mask;
++ if (!node->stores[offset])
++ return -ENOENT;
++
++ sradix_tree_delete_from_leaf(root, node, index);
++
++ return 0;
++}
+diff --git a/mm/Kconfig b/mm/Kconfig
+index 390165ffbb0f..50d02cfa0b68 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -317,6 +317,32 @@ config KSM
+ See Documentation/vm/ksm.rst for more information: KSM is inactive
+ until a program has madvised that an area is MADV_MERGEABLE, and
+ root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
++choice
++ prompt "Choose UKSM/KSM strategy"
++ default UKSM
++ depends on KSM
++ help
++ This option allows to select a UKSM/KSM stragety.
++
++config UKSM
++ bool "Ultra-KSM for page merging"
++ depends on KSM
++ help
++ UKSM is inspired by the Linux kernel project \u2014 KSM(Kernel Same
++ page Merging), but with a fundamentally rewritten core algorithm. With
++ an advanced algorithm, UKSM now can transparently scans all anonymously
++ mapped user space applications with an significantly improved scan speed
++ and CPU efficiency. Since KVM is friendly to KSM, KVM can also benefit from
++ UKSM. Now UKSM has its first stable release and first real world enterprise user.
++ For more information, please goto its project page.
++ (github.com/dolohow/uksm)
++
++config KSM_LEGACY
++ bool "Legacy KSM implementation"
++ depends on KSM
++ help
++ The legacy KSM implementation from Red Hat.
++endchoice
+
+ config DEFAULT_MMAP_MIN_ADDR
+ int "Low address space to protect from user allocation"
+diff --git a/mm/Makefile b/mm/Makefile
+index d73aed0fc99c..d6612b76c5da 100644
+--- a/mm/Makefile
++++ b/mm/Makefile
+@@ -76,7 +76,8 @@ obj-$(CONFIG_SPARSEMEM) += sparse.o
+ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
+ obj-$(CONFIG_SLOB) += slob.o
+ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+-obj-$(CONFIG_KSM) += ksm.o
++obj-$(CONFIG_KSM_LEGACY) += ksm.o
++obj-$(CONFIG_UKSM) += uksm.o
+ obj-$(CONFIG_PAGE_POISONING) += page_poison.o
+ obj-$(CONFIG_SLAB) += slab.o
+ obj-$(CONFIG_SLUB) += slub.o
+diff --git a/mm/ksm.c b/mm/ksm.c
+index 0960750bb316..ae17158cb67a 100644
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -858,17 +858,6 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
+ return err;
+ }
+
+-static inline struct stable_node *page_stable_node(struct page *page)
+-{
+- return PageKsm(page) ? page_rmapping(page) : NULL;
+-}
+-
+-static inline void set_page_stable_node(struct page *page,
+- struct stable_node *stable_node)
+-{
+- page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
+-}
+-
+ #ifdef CONFIG_SYSFS
+ /*
+ * Only called through the sysfs control interface:
+diff --git a/mm/memory.c b/mm/memory.c
+index c48f8df6e502..db47ee177008 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -146,6 +146,25 @@ EXPORT_SYMBOL(zero_pfn);
+
+ unsigned long highest_memmap_pfn __read_mostly;
+
++#ifdef CONFIG_UKSM
++unsigned long uksm_zero_pfn __read_mostly;
++EXPORT_SYMBOL_GPL(uksm_zero_pfn);
++struct page *empty_uksm_zero_page;
++
++static int __init setup_uksm_zero_page(void)
++{
++ empty_uksm_zero_page = alloc_pages(__GFP_ZERO & ~__GFP_MOVABLE, 0);
++ if (!empty_uksm_zero_page)
++ panic("Oh boy, that early out of memory?");
++
++ SetPageReserved(empty_uksm_zero_page);
++ uksm_zero_pfn = page_to_pfn(empty_uksm_zero_page);
++
++ return 0;
++}
++core_initcall(setup_uksm_zero_page);
++#endif
++
+ /*
+ * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
+ */
+@@ -161,6 +180,7 @@ void mm_trace_rss_stat(struct mm_struct *mm, int member, long count)
+ trace_rss_stat(mm, member, count);
+ }
+
++
+ #if defined(SPLIT_RSS_COUNTING)
+
+ void sync_mm_rss(struct mm_struct *mm)
+@@ -869,6 +889,11 @@ copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
+ get_page(page);
+ page_dup_rmap(page, false);
+ rss[mm_counter(page)]++;
++
++ /* Should return NULL in vm_normal_page() */
++ uksm_bugon_zeropage(pte);
++ } else {
++ uksm_map_zero_page(pte);
+ }
+
+ /*
+@@ -1237,8 +1262,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
+ ptent = ptep_get_and_clear_full(mm, addr, pte,
+ tlb->fullmm);
+ tlb_remove_tlb_entry(tlb, pte, addr);
+- if (unlikely(!page))
++ if (unlikely(!page)) {
++ uksm_unmap_zero_page(ptent);
+ continue;
++ }
+
+ if (!PageAnon(page)) {
+ if (pte_dirty(ptent)) {
+@@ -2586,6 +2613,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
+
+ if (likely(src)) {
+ copy_user_highpage(dst, src, addr, vma);
++ uksm_cow_page(vma, src);
+ return true;
+ }
+
+@@ -2832,6 +2860,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
+ vmf->address);
+ if (!new_page)
+ goto oom;
++ uksm_cow_pte(vma, vmf->orig_pte);
+ } else {
+ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
+ vmf->address);
+@@ -2874,7 +2903,9 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
+ mm_counter_file(old_page));
+ inc_mm_counter_fast(mm, MM_ANONPAGES);
+ }
++ uksm_bugon_zeropage(vmf->orig_pte);
+ } else {
++ uksm_unmap_zero_page(vmf->orig_pte);
+ inc_mm_counter_fast(mm, MM_ANONPAGES);
+ }
+ flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 5c8b4485860d..b8dd56dd900d 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -46,6 +46,7 @@
+ #include <linux/moduleparam.h>
+ #include <linux/pkeys.h>
+ #include <linux/oom.h>
++#include <linux/ksm.h>
+ #include <linux/sched/mm.h>
+
+ #include <linux/uaccess.h>
+@@ -181,6 +182,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+ if (vma->vm_file)
+ fput(vma->vm_file);
+ mpol_put(vma_policy(vma));
++ uksm_remove_vma(vma);
+ vm_area_free(vma);
+ return next;
+ }
+@@ -750,9 +752,16 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ long adjust_next = 0;
+ int remove_next = 0;
+
++/*
++ * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is
++ * acquired
++ */
++ uksm_remove_vma(vma);
++
+ if (next && !insert) {
+ struct vm_area_struct *exporter = NULL, *importer = NULL;
+
++ uksm_remove_vma(next);
+ if (end >= next->vm_end) {
+ /*
+ * vma expands, overlapping all the next, and
+@@ -883,6 +892,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ end_changed = true;
+ }
+ vma->vm_pgoff = pgoff;
++
+ if (adjust_next) {
+ next->vm_start += adjust_next;
+ next->vm_pgoff += adjust_next >> PAGE_SHIFT;
+@@ -987,6 +997,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ if (remove_next == 2) {
+ remove_next = 1;
+ end = next->vm_end;
++ uksm_remove_vma(next);
+ goto again;
+ }
+ else if (next)
+@@ -1013,10 +1024,14 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ */
+ VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
+ }
++ } else {
++ if (next && !insert)
++ uksm_vma_add_new(next);
+ }
+ if (insert && file)
+ uprobe_mmap(insert);
+
++ uksm_vma_add_new(vma);
+ validate_mm(mm);
+
+ return 0;
+@@ -1472,6 +1487,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
+ vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+ mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
++ /* If uksm is enabled, we add VM_MERGEABLE to new VMAs. */
++ uksm_vm_flags_mod(&vm_flags);
++
+ if (flags & MAP_LOCKED)
+ if (!can_do_mlock())
+ return -EPERM;
+@@ -1867,6 +1885,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
+ allow_write_access(file);
+ }
+ file = vma->vm_file;
++ uksm_vma_add_new(vma);
+ out:
+ perf_event_mmap(vma);
+
+@@ -1909,6 +1928,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
+ if (vm_flags & VM_DENYWRITE)
+ allow_write_access(file);
+ free_vma:
++ uksm_remove_vma(vma);
+ vm_area_free(vma);
+ unacct_error:
+ if (charged)
+@@ -2768,6 +2788,8 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+ else
+ err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+
++ uksm_vma_add_new(new);
++
+ /* Success. */
+ if (!err)
+ return 0;
+@@ -3075,6 +3097,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
+ if ((flags & (~VM_EXEC)) != 0)
+ return -EINVAL;
+ flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
++ uksm_vm_flags_mod(&flags);
+
+ mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+ if (IS_ERR_VALUE(mapped_addr))
+@@ -3120,6 +3143,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
+ vma->vm_flags = flags;
+ vma->vm_page_prot = vm_get_page_prot(flags);
+ vma_link(mm, vma, prev, rb_link, rb_parent);
++ uksm_vma_add_new(vma);
+ out:
+ perf_event_mmap(vma);
+ mm->total_vm += len >> PAGE_SHIFT;
+@@ -3197,6 +3221,12 @@ void exit_mmap(struct mm_struct *mm)
+ mmap_write_unlock(mm);
+ }
+
++ /*
++ * Taking write lock on mmap does not harm others,
++ * but it's crucial for uksm to avoid races.
++ */
++ mmap_write_lock(mm);
++
+ if (mm->locked_vm) {
+ vma = mm->mmap;
+ while (vma) {
+@@ -3232,6 +3262,11 @@ void exit_mmap(struct mm_struct *mm)
+ cond_resched();
+ }
+ vm_unacct_memory(nr_accounted);
++
++ mm->mmap = NULL;
++ mm->mm_rb = RB_ROOT;
++ vmacache_invalidate(mm);
++ mmap_write_unlock(mm);
+ }
+
+ /* Insert vm structure into process list sorted by address
+@@ -3339,6 +3374,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+ new_vma->vm_ops->open(new_vma);
+ vma_link(mm, new_vma, prev, rb_link, rb_parent);
+ *need_rmap_locks = false;
++ uksm_vma_add_new(new_vma);
+ }
+ return new_vma;
+
+@@ -3491,6 +3527,7 @@ static struct vm_area_struct *__install_special_mapping(
+ vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
+
+ perf_event_mmap(vma);
++ uksm_vma_add_new(vma);
+
+ return vma;
+
+diff --git a/mm/uksm.c b/mm/uksm.c
+new file mode 100644
+index 000000000000..e4732c00be69
+--- /dev/null
++++ b/mm/uksm.c
+@@ -0,0 +1,5614 @@
++/*
++ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
++ *
++ * This is an improvement upon KSM. Some basic data structures and routines
++ * are borrowed from ksm.c .
++ *
++ * Its new features:
++ * 1. Full system scan:
++ * It automatically scans all user processes' anonymous VMAs. Kernel-user
++ * interaction to submit a memory area to KSM is no longer needed.
++ *
++ * 2. Rich area detection:
++ * It automatically detects rich areas containing abundant duplicated
++ * pages based. Rich areas are given a full scan speed. Poor areas are
++ * sampled at a reasonable speed with very low CPU consumption.
++ *
++ * 3. Ultra Per-page scan speed improvement:
++ * A new hash algorithm is proposed. As a result, on a machine with
++ * Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
++ * can scan memory areas that does not contain duplicated pages at speed of
++ * 627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
++ * 477MB/sec ~ 923MB/sec.
++ *
++ * 4. Thrashing area avoidance:
++ * Thrashing area(an VMA that has frequent Ksm page break-out) can be
++ * filtered out. My benchmark shows it's more efficient than KSM's per-page
++ * hash value based volatile page detection.
++ *
++ *
++ * 5. Misc changes upon KSM:
++ * * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
++ * comparison. It's much faster than default C version on x86.
++ * * rmap_item now has an struct *page member to loosely cache a
++ * address-->page mapping, which reduces too much time-costly
++ * follow_page().
++ * * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
++ * * try_to_merge_two_pages() now can revert a pte if it fails. No break_
++ * ksm is needed for this case.
++ *
++ * 6. Full Zero Page consideration(contributed by Figo Zhang)
++ * Now uksmd consider full zero pages as special pages and merge them to an
++ * special unswappable uksm zero page.
++ */
++
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/fs.h>
++#include <linux/mman.h>
++#include <linux/sched.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/coredump.h>
++#include <linux/sched/cputime.h>
++#include <linux/rwsem.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/spinlock.h>
++#include <linux/jhash.h>
++#include <linux/delay.h>
++#include <linux/kthread.h>
++#include <linux/wait.h>
++#include <linux/slab.h>
++#include <linux/rbtree.h>
++#include <linux/memory.h>
++#include <linux/mmu_notifier.h>
++#include <linux/swap.h>
++#include <linux/ksm.h>
++#include <linux/crypto.h>
++#include <linux/scatterlist.h>
++#include <crypto/hash.h>
++#include <linux/random.h>
++#include <linux/math64.h>
++#include <linux/gcd.h>
++#include <linux/freezer.h>
++#include <linux/oom.h>
++#include <linux/numa.h>
++#include <linux/sradix-tree.h>
++
++#include <asm/tlbflush.h>
++#include "internal.h"
++
++#ifdef CONFIG_X86
++#undef memcmp
++
++#ifdef CONFIG_X86_32
++#define memcmp memcmpx86_32
++/*
++ * Compare 4-byte-aligned address s1 and s2, with length n
++ */
++int memcmpx86_32(void *s1, void *s2, size_t n)
++{
++ size_t num = n / 4;
++ register int res;
++
++ __asm__ __volatile__
++ (
++ "testl %3,%3\n\t"
++ "repe; cmpsd\n\t"
++ "je 1f\n\t"
++ "sbbl %0,%0\n\t"
++ "orl $1,%0\n"
++ "1:"
++ : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
++ : "0" (0)
++ : "cc");
++
++ return res;
++}
++
++/*
++ * Check the page is all zero ?
++ */
++static int is_full_zero(const void *s1, size_t len)
++{
++ unsigned char same;
++
++ len /= 4;
++
++ __asm__ __volatile__
++ ("repe; scasl;"
++ "sete %0"
++ : "=qm" (same), "+D" (s1), "+c" (len)
++ : "a" (0)
++ : "cc");
++
++ return same;
++}
++
++
++#elif defined(CONFIG_X86_64)
++#define memcmp memcmpx86_64
++/*
++ * Compare 8-byte-aligned address s1 and s2, with length n
++ */
++int memcmpx86_64(void *s1, void *s2, size_t n)
++{
++ size_t num = n / 8;
++ register int res;
++
++ __asm__ __volatile__
++ (
++ "testq %q3,%q3\n\t"
++ "repe; cmpsq\n\t"
++ "je 1f\n\t"
++ "sbbq %q0,%q0\n\t"
++ "orq $1,%q0\n"
++ "1:"
++ : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
++ : "0" (0)
++ : "cc");
++
++ return res;
++}
++
++static int is_full_zero(const void *s1, size_t len)
++{
++ unsigned char same;
++
++ len /= 8;
++
++ __asm__ __volatile__
++ ("repe; scasq;"
++ "sete %0"
++ : "=qm" (same), "+D" (s1), "+c" (len)
++ : "a" (0)
++ : "cc");
++
++ return same;
++}
++
++#endif
++#else
++static int is_full_zero(const void *s1, size_t len)
++{
++ unsigned long *src = s1;
++ int i;
++
++ len /= sizeof(*src);
++
++ for (i = 0; i < len; i++) {
++ if (src[i])
++ return 0;
++ }
++
++ return 1;
++}
++#endif
++
++#define UKSM_RUNG_ROUND_FINISHED (1 << 0)
++#define TIME_RATIO_SCALE 10000
++
++#define SLOT_TREE_NODE_SHIFT 8
++#define SLOT_TREE_NODE_STORE_SIZE (1UL << SLOT_TREE_NODE_SHIFT)
++struct slot_tree_node {
++ unsigned long size;
++ struct sradix_tree_node snode;
++ void *stores[SLOT_TREE_NODE_STORE_SIZE];
++};
++
++static struct kmem_cache *slot_tree_node_cachep;
++
++static struct sradix_tree_node *slot_tree_node_alloc(void)
++{
++ struct slot_tree_node *p;
++
++ p = kmem_cache_zalloc(slot_tree_node_cachep, GFP_KERNEL |
++ __GFP_NORETRY | __GFP_NOWARN);
++ if (!p)
++ return NULL;
++
++ return &p->snode;
++}
++
++static void slot_tree_node_free(struct sradix_tree_node *node)
++{
++ struct slot_tree_node *p;
++
++ p = container_of(node, struct slot_tree_node, snode);
++ kmem_cache_free(slot_tree_node_cachep, p);
++}
++
++static void slot_tree_node_extend(struct sradix_tree_node *parent,
++ struct sradix_tree_node *child)
++{
++ struct slot_tree_node *p, *c;
++
++ p = container_of(parent, struct slot_tree_node, snode);
++ c = container_of(child, struct slot_tree_node, snode);
++
++ p->size += c->size;
++}
++
++void slot_tree_node_assign(struct sradix_tree_node *node,
++ unsigned int index, void *item)
++{
++ struct vma_slot *slot = item;
++ struct slot_tree_node *cur;
++
++ slot->snode = node;
++ slot->sindex = index;
++
++ while (node) {
++ cur = container_of(node, struct slot_tree_node, snode);
++ cur->size += slot->pages;
++ node = node->parent;
++ }
++}
++
++void slot_tree_node_rm(struct sradix_tree_node *node, unsigned int offset)
++{
++ struct vma_slot *slot;
++ struct slot_tree_node *cur;
++ unsigned long pages;
++
++ if (node->height == 1) {
++ slot = node->stores[offset];
++ pages = slot->pages;
++ } else {
++ cur = container_of(node->stores[offset],
++ struct slot_tree_node, snode);
++ pages = cur->size;
++ }
++
++ while (node) {
++ cur = container_of(node, struct slot_tree_node, snode);
++ cur->size -= pages;
++ node = node->parent;
++ }
++}
++
++unsigned long slot_iter_index;
++int slot_iter(void *item, unsigned long height)
++{
++ struct slot_tree_node *node;
++ struct vma_slot *slot;
++
++ if (height == 1) {
++ slot = item;
++ if (slot_iter_index < slot->pages) {
++ /*in this one*/
++ return 1;
++ } else {
++ slot_iter_index -= slot->pages;
++ return 0;
++ }
++
++ } else {
++ node = container_of(item, struct slot_tree_node, snode);
++ if (slot_iter_index < node->size) {
++ /*in this one*/
++ return 1;
++ } else {
++ slot_iter_index -= node->size;
++ return 0;
++ }
++ }
++}
++
++
++static inline void slot_tree_init_root(struct sradix_tree_root *root)
++{
++ init_sradix_tree_root(root, SLOT_TREE_NODE_SHIFT);
++ root->alloc = slot_tree_node_alloc;
++ root->free = slot_tree_node_free;
++ root->extend = slot_tree_node_extend;
++ root->assign = slot_tree_node_assign;
++ root->rm = slot_tree_node_rm;
++}
++
++void slot_tree_init(void)
++{
++ slot_tree_node_cachep = kmem_cache_create("slot_tree_node",
++ sizeof(struct slot_tree_node), 0,
++ SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
++ NULL);
++}
++
++
++/* Each rung of this ladder is a list of VMAs having a same scan ratio */
++struct scan_rung {
++ //struct list_head scanned_list;
++ struct sradix_tree_root vma_root;
++ struct sradix_tree_root vma_root2;
++
++ struct vma_slot *current_scan;
++ unsigned long current_offset;
++
++ /*
++ * The initial value for current_offset, it should loop over
++ * [0~ step - 1] to let all slot have its chance to be scanned.
++ */
++ unsigned long offset_init;
++ unsigned long step; /* dynamic step for current_offset */
++ unsigned int flags;
++ unsigned long pages_to_scan;
++ //unsigned long fully_scanned_slots;
++ /*
++ * a little bit tricky - if cpu_time_ratio > 0, then the value is the
++ * the cpu time ratio it can spend in rung_i for every scan
++ * period. if < 0, then it is the cpu time ratio relative to the
++ * max cpu percentage user specified. Both in unit of
++ * 1/TIME_RATIO_SCALE
++ */
++ int cpu_ratio;
++
++ /*
++ * How long it will take for all slots in this rung to be fully
++ * scanned? If it's zero, we don't care about the cover time:
++ * it's fully scanned.
++ */
++ unsigned int cover_msecs;
++ //unsigned long vma_num;
++ //unsigned long pages; /* Sum of all slot's pages in rung */
++};
++
++/**
++ * node of either the stable or unstale rbtree
++ *
++ */
++struct tree_node {
++ struct rb_node node; /* link in the main (un)stable rbtree */
++ struct rb_root sub_root; /* rb_root for sublevel collision rbtree */
++ u32 hash;
++ unsigned long count; /* TODO: merged with sub_root */
++ struct list_head all_list; /* all tree nodes in stable/unstable tree */
++};
++
++/**
++ * struct stable_node - node of the stable rbtree
++ * @node: rb node of this ksm page in the stable tree
++ * @hlist: hlist head of rmap_items using this ksm page
++ * @kpfn: page frame number of this ksm page
++ */
++struct stable_node {
++ struct rb_node node; /* link in sub-rbtree */
++ struct tree_node *tree_node; /* it's tree node root in stable tree, NULL if it's in hell list */
++ struct hlist_head hlist;
++ unsigned long kpfn;
++ u32 hash_max; /* if ==0 then it's not been calculated yet */
++ struct list_head all_list; /* in a list for all stable nodes */
++};
++
++/**
++ * struct node_vma - group rmap_items linked in a same stable
++ * node together.
++ */
++struct node_vma {
++ union {
++ struct vma_slot *slot;
++ unsigned long key; /* slot is used as key sorted on hlist */
++ };
++ struct hlist_node hlist;
++ struct hlist_head rmap_hlist;
++ struct stable_node *head;
++};
++
++/**
++ * struct rmap_item - reverse mapping item for virtual addresses
++ * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
++ * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
++ * @mm: the memory structure this rmap_item is pointing into
++ * @address: the virtual address this rmap_item tracks (+ flags in low bits)
++ * @node: rb node of this rmap_item in the unstable tree
++ * @head: pointer to stable_node heading this list in the stable tree
++ * @hlist: link into hlist of rmap_items hanging off that stable_node
++ */
++struct rmap_item {
++ struct vma_slot *slot;
++ struct page *page;
++ unsigned long address; /* + low bits used for flags below */
++ unsigned long hash_round;
++ unsigned long entry_index;
++ union {
++ struct {/* when in unstable tree */
++ struct rb_node node;
++ struct tree_node *tree_node;
++ u32 hash_max;
++ };
++ struct { /* when in stable tree */
++ struct node_vma *head;
++ struct hlist_node hlist;
++ struct anon_vma *anon_vma;
++ };
++ };
++} __aligned(4);
++
++struct rmap_list_entry {
++ union {
++ struct rmap_item *item;
++ unsigned long addr;
++ };
++ /* lowest bit is used for is_addr tag */
++} __aligned(4); /* 4 aligned to fit in to pages*/
++
++
++/* Basic data structure definition ends */
++
++
++/*
++ * Flags for rmap_item to judge if it's listed in the stable/unstable tree.
++ * The flags use the low bits of rmap_item.address
++ */
++#define UNSTABLE_FLAG 0x1
++#define STABLE_FLAG 0x2
++#define get_rmap_addr(x) ((x)->address & PAGE_MASK)
++
++/*
++ * rmap_list_entry helpers
++ */
++#define IS_ADDR_FLAG 1
++#define is_addr(ptr) ((unsigned long)(ptr) & IS_ADDR_FLAG)
++#define set_is_addr(ptr) ((ptr) |= IS_ADDR_FLAG)
++#define get_clean_addr(ptr) (((ptr) & ~(__typeof__(ptr))IS_ADDR_FLAG))
++
++
++/*
++ * High speed caches for frequently allocated and freed structs
++ */
++static struct kmem_cache *rmap_item_cache;
++static struct kmem_cache *stable_node_cache;
++static struct kmem_cache *node_vma_cache;
++static struct kmem_cache *vma_slot_cache;
++static struct kmem_cache *tree_node_cache;
++#define UKSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("uksm_"#__struct,\
++ sizeof(struct __struct), __alignof__(struct __struct),\
++ (__flags), NULL)
++
++/* Array of all scan_rung, uksm_scan_ladder[0] having the minimum scan ratio */
++#define SCAN_LADDER_SIZE 4
++static struct scan_rung uksm_scan_ladder[SCAN_LADDER_SIZE];
++
++/* The evaluation rounds uksmd has finished */
++static unsigned long long uksm_eval_round = 1;
++
++/*
++ * we add 1 to this var when we consider we should rebuild the whole
++ * unstable tree.
++ */
++static unsigned long uksm_hash_round = 1;
++
++/*
++ * How many times the whole memory is scanned.
++ */
++static unsigned long long fully_scanned_round = 1;
++
++/* The total number of virtual pages of all vma slots */
++static u64 uksm_pages_total;
++
++/* The number of pages has been scanned since the start up */
++static u64 uksm_pages_scanned;
++
++static u64 scanned_virtual_pages;
++
++/* The number of pages has been scanned since last encode_benefit call */
++static u64 uksm_pages_scanned_last;
++
++/* If the scanned number is tooo large, we encode it here */
++static u64 pages_scanned_stored;
++
++static unsigned long pages_scanned_base;
++
++/* The number of nodes in the stable tree */
++static unsigned long uksm_pages_shared;
++
++/* The number of page slots additionally sharing those nodes */
++static unsigned long uksm_pages_sharing;
++
++/* The number of nodes in the unstable tree */
++static unsigned long uksm_pages_unshared;
++
++/*
++ * Milliseconds ksmd should sleep between scans,
++ * >= 100ms to be consistent with
++ * scan_time_to_sleep_msec()
++ */
++static unsigned int uksm_sleep_jiffies;
++
++/* The real value for the uksmd next sleep */
++static unsigned int uksm_sleep_real;
++
++/* Saved value for user input uksm_sleep_jiffies when it's enlarged */
++static unsigned int uksm_sleep_saved;
++
++/* Max percentage of cpu utilization ksmd can take to scan in one batch */
++static unsigned int uksm_max_cpu_percentage;
++
++static int uksm_cpu_governor;
++
++static char *uksm_cpu_governor_str[4] = { "full", "medium", "low", "quiet" };
++
++struct uksm_cpu_preset_s {
++ int cpu_ratio[SCAN_LADDER_SIZE];
++ unsigned int cover_msecs[SCAN_LADDER_SIZE];
++ unsigned int max_cpu; /* percentage */
++};
++
++struct uksm_cpu_preset_s uksm_cpu_preset[4] = {
++ { {20, 40, -2500, -10000}, {1000, 500, 200, 50}, 95},
++ { {20, 30, -2500, -10000}, {1000, 500, 400, 100}, 50},
++ { {10, 20, -5000, -10000}, {1500, 1000, 1000, 250}, 20},
++ { {10, 20, 40, 75}, {2000, 1000, 1000, 1000}, 1},
++};
++
++/* The default value for uksm_ema_page_time if it's not initialized */
++#define UKSM_PAGE_TIME_DEFAULT 500
++
++/*cost to scan one page by expotional moving average in nsecs */
++static unsigned long uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
++
++/* The expotional moving average alpha weight, in percentage. */
++#define EMA_ALPHA 20
++
++/*
++ * The threshold used to filter out thrashing areas,
++ * If it == 0, filtering is disabled, otherwise it's the percentage up-bound
++ * of the thrashing ratio of all areas. Any area with a bigger thrashing ratio
++ * will be considered as having a zero duplication ratio.
++ */
++static unsigned int uksm_thrash_threshold = 50;
++
++/* How much dedup ratio is considered to be abundant*/
++static unsigned int uksm_abundant_threshold = 10;
++
++/* All slots having merged pages in this eval round. */
++struct list_head vma_slot_dedup = LIST_HEAD_INIT(vma_slot_dedup);
++
++/* How many times the ksmd has slept since startup */
++static unsigned long long uksm_sleep_times;
++
++#define UKSM_RUN_STOP 0
++#define UKSM_RUN_MERGE 1
++static unsigned int uksm_run = 1;
++
++static DECLARE_WAIT_QUEUE_HEAD(uksm_thread_wait);
++static DEFINE_MUTEX(uksm_thread_mutex);
++
++/*
++ * List vma_slot_new is for newly created vma_slot waiting to be added by
++ * ksmd. If one cannot be added(e.g. due to it's too small), it's moved to
++ * vma_slot_noadd. vma_slot_del is the list for vma_slot whose corresponding
++ * VMA has been removed/freed.
++ */
++struct list_head vma_slot_new = LIST_HEAD_INIT(vma_slot_new);
++struct list_head vma_slot_noadd = LIST_HEAD_INIT(vma_slot_noadd);
++struct list_head vma_slot_del = LIST_HEAD_INIT(vma_slot_del);
++static DEFINE_SPINLOCK(vma_slot_list_lock);
++
++/* The unstable tree heads */
++static struct rb_root root_unstable_tree = RB_ROOT;
++
++/*
++ * All tree_nodes are in a list to be freed at once when unstable tree is
++ * freed after each scan round.
++ */
++static struct list_head unstable_tree_node_list =
++ LIST_HEAD_INIT(unstable_tree_node_list);
++
++/* List contains all stable nodes */
++static struct list_head stable_node_list = LIST_HEAD_INIT(stable_node_list);
++
++/*
++ * When the hash strength is changed, the stable tree must be delta_hashed and
++ * re-structured. We use two set of below structs to speed up the
++ * re-structuring of stable tree.
++ */
++static struct list_head
++stable_tree_node_list[2] = {LIST_HEAD_INIT(stable_tree_node_list[0]),
++ LIST_HEAD_INIT(stable_tree_node_list[1])};
++
++static struct list_head *stable_tree_node_listp = &stable_tree_node_list[0];
++static struct rb_root root_stable_tree[2] = {RB_ROOT, RB_ROOT};
++static struct rb_root *root_stable_treep = &root_stable_tree[0];
++static unsigned long stable_tree_index;
++
++/* The hash strength needed to hash a full page */
++#define HASH_STRENGTH_FULL (PAGE_SIZE / sizeof(u32))
++
++/* The hash strength needed for loop-back hashing */
++#define HASH_STRENGTH_MAX (HASH_STRENGTH_FULL + 10)
++
++/* The random offsets in a page */
++static u32 *random_nums;
++
++/* The hash strength */
++static unsigned long hash_strength = HASH_STRENGTH_FULL >> 4;
++
++/* The delta value each time the hash strength increases or decreases */
++static unsigned long hash_strength_delta;
++#define HASH_STRENGTH_DELTA_MAX 5
++
++/* The time we have saved due to random_sample_hash */
++static u64 rshash_pos;
++
++/* The time we have wasted due to hash collision */
++static u64 rshash_neg;
++
++struct uksm_benefit {
++ u64 pos;
++ u64 neg;
++ u64 scanned;
++ unsigned long base;
++} benefit;
++
++/*
++ * The relative cost of memcmp, compared to 1 time unit of random sample
++ * hash, this value is tested when ksm module is initialized
++ */
++static unsigned long memcmp_cost;
++
++static unsigned long rshash_neg_cont_zero;
++static unsigned long rshash_cont_obscure;
++
++/* The possible states of hash strength adjustment heuristic */
++enum rshash_states {
++ RSHASH_STILL,
++ RSHASH_TRYUP,
++ RSHASH_TRYDOWN,
++ RSHASH_NEW,
++ RSHASH_PRE_STILL,
++};
++
++/* The possible direction we are about to adjust hash strength */
++enum rshash_direct {
++ GO_UP,
++ GO_DOWN,
++ OBSCURE,
++ STILL,
++};
++
++/* random sampling hash state machine */
++static struct {
++ enum rshash_states state;
++ enum rshash_direct pre_direct;
++ u8 below_count;
++ /* Keep a lookup window of size 5, iff above_count/below_count > 3
++ * in this window we stop trying.
++ */
++ u8 lookup_window_index;
++ u64 stable_benefit;
++ unsigned long turn_point_down;
++ unsigned long turn_benefit_down;
++ unsigned long turn_point_up;
++ unsigned long turn_benefit_up;
++ unsigned long stable_point;
++} rshash_state;
++
++/*zero page hash table, hash_strength [0 ~ HASH_STRENGTH_MAX]*/
++static u32 *zero_hash_table;
++
++static inline struct node_vma *alloc_node_vma(void)
++{
++ struct node_vma *node_vma;
++
++ node_vma = kmem_cache_zalloc(node_vma_cache, GFP_KERNEL |
++ __GFP_NORETRY | __GFP_NOWARN);
++ if (node_vma) {
++ INIT_HLIST_HEAD(&node_vma->rmap_hlist);
++ INIT_HLIST_NODE(&node_vma->hlist);
++ }
++ return node_vma;
++}
++
++static inline void free_node_vma(struct node_vma *node_vma)
++{
++ kmem_cache_free(node_vma_cache, node_vma);
++}
++
++
++static inline struct vma_slot *alloc_vma_slot(void)
++{
++ struct vma_slot *slot;
++
++ /*
++ * In case ksm is not initialized by now.
++ * Oops, we need to consider the call site of uksm_init() in the future.
++ */
++ if (!vma_slot_cache)
++ return NULL;
++
++ slot = kmem_cache_zalloc(vma_slot_cache, GFP_KERNEL |
++ __GFP_NORETRY | __GFP_NOWARN);
++ if (slot) {
++ INIT_LIST_HEAD(&slot->slot_list);
++ INIT_LIST_HEAD(&slot->dedup_list);
++ slot->flags |= UKSM_SLOT_NEED_RERAND;
++ }
++ return slot;
++}
++
++static inline void free_vma_slot(struct vma_slot *vma_slot)
++{
++ kmem_cache_free(vma_slot_cache, vma_slot);
++}
++
++
++
++static inline struct rmap_item *alloc_rmap_item(void)
++{
++ struct rmap_item *rmap_item;
++
++ rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
++ __GFP_NORETRY | __GFP_NOWARN);
++ if (rmap_item) {
++ /* bug on lowest bit is not clear for flag use */
++ BUG_ON(is_addr(rmap_item));
++ }
++ return rmap_item;
++}
++
++static inline void free_rmap_item(struct rmap_item *rmap_item)
++{
++ rmap_item->slot = NULL; /* debug safety */
++ kmem_cache_free(rmap_item_cache, rmap_item);
++}
++
++static inline struct stable_node *alloc_stable_node(void)
++{
++ struct stable_node *node;
++
++ node = kmem_cache_alloc(stable_node_cache, GFP_KERNEL |
++ __GFP_NORETRY | __GFP_NOWARN);
++ if (!node)
++ return NULL;
++
++ INIT_HLIST_HEAD(&node->hlist);
++ list_add(&node->all_list, &stable_node_list);
++ return node;
++}
++
++static inline void free_stable_node(struct stable_node *stable_node)
++{
++ list_del(&stable_node->all_list);
++ kmem_cache_free(stable_node_cache, stable_node);
++}
++
++static inline struct tree_node *alloc_tree_node(struct list_head *list)
++{
++ struct tree_node *node;
++
++ node = kmem_cache_zalloc(tree_node_cache, GFP_KERNEL |
++ __GFP_NORETRY | __GFP_NOWARN);
++ if (!node)
++ return NULL;
++
++ list_add(&node->all_list, list);
++ return node;
++}
++
++static inline void free_tree_node(struct tree_node *node)
++{
++ list_del(&node->all_list);
++ kmem_cache_free(tree_node_cache, node);
++}
++
++static void uksm_drop_anon_vma(struct rmap_item *rmap_item)
++{
++ struct anon_vma *anon_vma = rmap_item->anon_vma;
++
++ put_anon_vma(anon_vma);
++}
++
++
++/**
++ * Remove a stable node from stable_tree, may unlink from its tree_node and
++ * may remove its parent tree_node if no other stable node is pending.
++ *
++ * @stable_node The node need to be removed
++ * @unlink_rb Will this node be unlinked from the rbtree?
++ * @remove_tree_ node Will its tree_node be removed if empty?
++ */
++static void remove_node_from_stable_tree(struct stable_node *stable_node,
++ int unlink_rb, int remove_tree_node)
++{
++ struct node_vma *node_vma;
++ struct rmap_item *rmap_item;
++ struct hlist_node *n;
++
++ if (!hlist_empty(&stable_node->hlist)) {
++ hlist_for_each_entry_safe(node_vma, n,
++ &stable_node->hlist, hlist) {
++ hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) {
++ uksm_pages_sharing--;
++
++ uksm_drop_anon_vma(rmap_item);
++ rmap_item->address &= PAGE_MASK;
++ }
++ free_node_vma(node_vma);
++ cond_resched();
++ }
++
++ /* the last one is counted as shared */
++ uksm_pages_shared--;
++ uksm_pages_sharing++;
++ }
++
++ if (stable_node->tree_node && unlink_rb) {
++ rb_erase(&stable_node->node,
++ &stable_node->tree_node->sub_root);
++
++ if (RB_EMPTY_ROOT(&stable_node->tree_node->sub_root) &&
++ remove_tree_node) {
++ rb_erase(&stable_node->tree_node->node,
++ root_stable_treep);
++ free_tree_node(stable_node->tree_node);
++ } else {
++ stable_node->tree_node->count--;
++ }
++ }
++
++ free_stable_node(stable_node);
++}
++
++
++/*
++ * get_uksm_page: checks if the page indicated by the stable node
++ * is still its ksm page, despite having held no reference to it.
++ * In which case we can trust the content of the page, and it
++ * returns the gotten page; but if the page has now been zapped,
++ * remove the stale node from the stable tree and return NULL.
++ *
++ * You would expect the stable_node to hold a reference to the ksm page.
++ * But if it increments the page's count, swapping out has to wait for
++ * ksmd to come around again before it can free the page, which may take
++ * seconds or even minutes: much too unresponsive. So instead we use a
++ * "keyhole reference": access to the ksm page from the stable node peeps
++ * out through its keyhole to see if that page still holds the right key,
++ * pointing back to this stable node. This relies on freeing a PageAnon
++ * page to reset its page->mapping to NULL, and relies on no other use of
++ * a page to put something that might look like our key in page->mapping.
++ *
++ * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
++ * but this is different - made simpler by uksm_thread_mutex being held, but
++ * interesting for assuming that no other use of the struct page could ever
++ * put our expected_mapping into page->mapping (or a field of the union which
++ * coincides with page->mapping). The RCU calls are not for KSM at all, but
++ * to keep the page_count protocol described with page_cache_get_speculative.
++ *
++ * Note: it is possible that get_uksm_page() will return NULL one moment,
++ * then page the next, if the page is in between page_freeze_refs() and
++ * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
++ * is on its way to being freed; but it is an anomaly to bear in mind.
++ *
++ * @unlink_rb: if the removal of this node will firstly unlink from
++ * its rbtree. stable_node_reinsert will prevent this when restructuring the
++ * node from its old tree.
++ *
++ * @remove_tree_node: if this is the last one of its tree_node, will the
++ * tree_node be freed ? If we are inserting stable node, this tree_node may
++ * be reused, so don't free it.
++ */
++static struct page *get_uksm_page(struct stable_node *stable_node,
++ int unlink_rb, int remove_tree_node)
++{
++ struct page *page;
++ void *expected_mapping;
++ unsigned long kpfn;
++
++ expected_mapping = (void *)((unsigned long)stable_node |
++ PAGE_MAPPING_KSM);
++again:
++ kpfn = READ_ONCE(stable_node->kpfn);
++ page = pfn_to_page(kpfn);
++
++ /*
++ * page is computed from kpfn, so on most architectures reading
++ * page->mapping is naturally ordered after reading node->kpfn,
++ * but on Alpha we need to be more careful.
++ */
++ smp_rmb();
++
++ if (READ_ONCE(page->mapping) != expected_mapping)
++ goto stale;
++
++ /*
++ * We cannot do anything with the page while its refcount is 0.
++ * Usually 0 means free, or tail of a higher-order page: in which
++ * case this node is no longer referenced, and should be freed;
++ * however, it might mean that the page is under page_freeze_refs().
++ * The __remove_mapping() case is easy, again the node is now stale;
++ * but if page is swapcache in migrate_page_move_mapping(), it might
++ * still be our page, in which case it's essential to keep the node.
++ */
++ while (!get_page_unless_zero(page)) {
++ /*
++ * Another check for page->mapping != expected_mapping would
++ * work here too. We have chosen the !PageSwapCache test to
++ * optimize the common case, when the page is or is about to
++ * be freed: PageSwapCache is cleared (under spin_lock_irq)
++ * in the freeze_refs section of __remove_mapping(); but Anon
++ * page->mapping reset to NULL later, in free_pages_prepare().
++ */
++ if (!PageSwapCache(page))
++ goto stale;
++ cpu_relax();
++ }
++
++ if (READ_ONCE(page->mapping) != expected_mapping) {
++ put_page(page);
++ goto stale;
++ }
++
++ lock_page(page);
++ if (READ_ONCE(page->mapping) != expected_mapping) {
++ unlock_page(page);
++ put_page(page);
++ goto stale;
++ }
++ unlock_page(page);
++ return page;
++stale:
++ /*
++ * We come here from above when page->mapping or !PageSwapCache
++ * suggests that the node is stale; but it might be under migration.
++ * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
++ * before checking whether node->kpfn has been changed.
++ */
++ smp_rmb();
++ if (stable_node->kpfn != kpfn)
++ goto again;
++
++ remove_node_from_stable_tree(stable_node, unlink_rb, remove_tree_node);
++
++ return NULL;
++}
++
++/*
++ * Removing rmap_item from stable or unstable tree.
++ * This function will clean the information from the stable/unstable tree.
++ */
++static inline void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
++{
++ if (rmap_item->address & STABLE_FLAG) {
++ struct stable_node *stable_node;
++ struct node_vma *node_vma;
++ struct page *page;
++
++ node_vma = rmap_item->head;
++ stable_node = node_vma->head;
++ page = get_uksm_page(stable_node, 1, 1);
++ if (!page)
++ goto out;
++
++ /*
++ * page lock is needed because it's racing with
++ * try_to_unmap_ksm(), etc.
++ */
++ lock_page(page);
++ hlist_del(&rmap_item->hlist);
++
++ if (hlist_empty(&node_vma->rmap_hlist)) {
++ hlist_del(&node_vma->hlist);
++ free_node_vma(node_vma);
++ }
++ unlock_page(page);
++
++ put_page(page);
++ if (hlist_empty(&stable_node->hlist)) {
++ /* do NOT call remove_node_from_stable_tree() here,
++ * it's possible for a forked rmap_item not in
++ * stable tree while the in-tree rmap_items were
++ * deleted.
++ */
++ uksm_pages_shared--;
++ } else
++ uksm_pages_sharing--;
++
++
++ uksm_drop_anon_vma(rmap_item);
++ } else if (rmap_item->address & UNSTABLE_FLAG) {
++ if (rmap_item->hash_round == uksm_hash_round) {
++
++ rb_erase(&rmap_item->node,
++ &rmap_item->tree_node->sub_root);
++ if (RB_EMPTY_ROOT(&rmap_item->tree_node->sub_root)) {
++ rb_erase(&rmap_item->tree_node->node,
++ &root_unstable_tree);
++
++ free_tree_node(rmap_item->tree_node);
++ } else
++ rmap_item->tree_node->count--;
++ }
++ uksm_pages_unshared--;
++ }
++
++ rmap_item->address &= PAGE_MASK;
++ rmap_item->hash_max = 0;
++
++out:
++ cond_resched(); /* we're called from many long loops */
++}
++
++static inline int slot_in_uksm(struct vma_slot *slot)
++{
++ return list_empty(&slot->slot_list);
++}
++
++/*
++ * Test if the mm is exiting
++ */
++static inline bool uksm_test_exit(struct mm_struct *mm)
++{
++ return atomic_read(&mm->mm_users) == 0;
++}
++
++static inline unsigned long vma_pool_size(struct vma_slot *slot)
++{
++ return round_up(sizeof(struct rmap_list_entry) * slot->pages,
++ PAGE_SIZE) >> PAGE_SHIFT;
++}
++
++#define CAN_OVERFLOW_U64(x, delta) (U64_MAX - (x) < (delta))
++
++/* must be done with sem locked */
++static int slot_pool_alloc(struct vma_slot *slot)
++{
++ unsigned long pool_size;
++
++ if (slot->rmap_list_pool)
++ return 0;
++
++ pool_size = vma_pool_size(slot);
++ slot->rmap_list_pool = kcalloc(pool_size, sizeof(struct page *),
++ GFP_KERNEL);
++ if (!slot->rmap_list_pool)
++ return -ENOMEM;
++
++ slot->pool_counts = kcalloc(pool_size, sizeof(unsigned int),
++ GFP_KERNEL);
++ if (!slot->pool_counts) {
++ kfree(slot->rmap_list_pool);
++ return -ENOMEM;
++ }
++
++ slot->pool_size = pool_size;
++ BUG_ON(CAN_OVERFLOW_U64(uksm_pages_total, slot->pages));
++ slot->flags |= UKSM_SLOT_IN_UKSM;
++ uksm_pages_total += slot->pages;
++
++ return 0;
++}
++
++/*
++ * Called after vma is unlinked from its mm
++ */
++void uksm_remove_vma(struct vm_area_struct *vma)
++{
++ struct vma_slot *slot;
++
++ if (!vma->uksm_vma_slot)
++ return;
++
++ spin_lock(&vma_slot_list_lock);
++ slot = vma->uksm_vma_slot;
++ if (!slot)
++ goto out;
++
++ if (slot_in_uksm(slot)) {
++ /**
++ * This slot has been added by ksmd, so move to the del list
++ * waiting ksmd to free it.
++ */
++ list_add_tail(&slot->slot_list, &vma_slot_del);
++ } else {
++ /**
++ * It's still on new list. It's ok to free slot directly.
++ */
++ list_del(&slot->slot_list);
++ free_vma_slot(slot);
++ }
++out:
++ vma->uksm_vma_slot = NULL;
++ spin_unlock(&vma_slot_list_lock);
++}
++
++/**
++ * Need to do two things:
++ * 1. check if slot was moved to del list
++ * 2. make sure the mmap_sem is manipulated under valid vma.
++ *
++ * My concern here is that in some cases, this may make
++ * vma_slot_list_lock() waiters to serialized further by some
++ * sem->wait_lock, can this really be expensive?
++ *
++ *
++ * @return
++ * 0: if successfully locked mmap_sem
++ * -ENOENT: this slot was moved to del list
++ * -EBUSY: vma lock failed
++ */
++static int try_down_read_slot_mmap_sem(struct vma_slot *slot)
++{
++ struct vm_area_struct *vma;
++ struct mm_struct *mm;
++ struct rw_semaphore *sem;
++
++ spin_lock(&vma_slot_list_lock);
++
++ /* the slot_list was removed and inited from new list, when it enters
++ * uksm_list. If now it's not empty, then it must be moved to del list
++ */
++ if (!slot_in_uksm(slot)) {
++ spin_unlock(&vma_slot_list_lock);
++ return -ENOENT;
++ }
++
++ BUG_ON(slot->pages != vma_pages(slot->vma));
++ /* Ok, vma still valid */
++ vma = slot->vma;
++ mm = vma->vm_mm;
++ sem = &mm->mmap_lock;
++
++ if (uksm_test_exit(mm)) {
++ spin_unlock(&vma_slot_list_lock);
++ return -ENOENT;
++ }
++
++ if (down_read_trylock(sem)) {
++ spin_unlock(&vma_slot_list_lock);
++ if (slot_pool_alloc(slot)) {
++ uksm_remove_vma(vma);
++ up_read(sem);
++ return -ENOENT;
++ }
++ return 0;
++ }
++
++ spin_unlock(&vma_slot_list_lock);
++ return -EBUSY;
++}
++
++static inline unsigned long
++vma_page_address(struct page *page, struct vm_area_struct *vma)
++{
++ pgoff_t pgoff = page->index;
++ unsigned long address;
++
++ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++ if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
++ /* page should be within @vma mapping range */
++ return -EFAULT;
++ }
++ return address;
++}
++
++
++/* return 0 on success with the item's mmap_sem locked */
++static inline int get_mergeable_page_lock_mmap(struct rmap_item *item)
++{
++ struct mm_struct *mm;
++ struct vma_slot *slot = item->slot;
++ int err = -EINVAL;
++
++ struct page *page;
++
++ /*
++ * try_down_read_slot_mmap_sem() returns non-zero if the slot
++ * has been removed by uksm_remove_vma().
++ */
++ if (try_down_read_slot_mmap_sem(slot))
++ return -EBUSY;
++
++ mm = slot->vma->vm_mm;
++
++ if (uksm_test_exit(mm))
++ goto failout_up;
++
++ page = item->page;
++ rcu_read_lock();
++ if (!get_page_unless_zero(page)) {
++ rcu_read_unlock();
++ goto failout_up;
++ }
++
++ /* No need to consider huge page here. */
++ if (item->slot->vma->anon_vma != page_anon_vma(page) ||
++ vma_page_address(page, item->slot->vma) != get_rmap_addr(item)) {
++ /*
++ * TODO:
++ * should we release this item becase of its stale page
++ * mapping?
++ */
++ put_page(page);
++ rcu_read_unlock();
++ goto failout_up;
++ }
++ rcu_read_unlock();
++ return 0;
++
++failout_up:
++ mmap_read_unlock(mm);
++ return err;
++}
++
++/*
++ * What kind of VMA is considered ?
++ */
++static inline int vma_can_enter(struct vm_area_struct *vma)
++{
++ return uksm_flags_can_scan(vma->vm_flags);
++}
++
++/*
++ * Called whenever a fresh new vma is created A new vma_slot.
++ * is created and inserted into a global list Must be called.
++ * after vma is inserted to its mm.
++ */
++void uksm_vma_add_new(struct vm_area_struct *vma)
++{
++ struct vma_slot *slot;
++
++ if (!vma_can_enter(vma)) {
++ vma->uksm_vma_slot = NULL;
++ return;
++ }
++
++ slot = alloc_vma_slot();
++ if (!slot) {
++ vma->uksm_vma_slot = NULL;
++ return;
++ }
++
++ vma->uksm_vma_slot = slot;
++ vma->vm_flags |= VM_MERGEABLE;
++ slot->vma = vma;
++ slot->mm = vma->vm_mm;
++ slot->ctime_j = jiffies;
++ slot->pages = vma_pages(vma);
++ spin_lock(&vma_slot_list_lock);
++ list_add_tail(&slot->slot_list, &vma_slot_new);
++ spin_unlock(&vma_slot_list_lock);
++}
++
++/* 32/3 < they < 32/2 */
++#define shiftl 8
++#define shiftr 12
++
++#define HASH_FROM_TO(from, to) \
++for (index = from; index < to; index++) { \
++ pos = random_nums[index]; \
++ hash += key[pos]; \
++ hash += (hash << shiftl); \
++ hash ^= (hash >> shiftr); \
++}
++
++
++#define HASH_FROM_DOWN_TO(from, to) \
++for (index = from - 1; index >= to; index--) { \
++ hash ^= (hash >> shiftr); \
++ hash ^= (hash >> (shiftr*2)); \
++ hash -= (hash << shiftl); \
++ hash += (hash << (shiftl*2)); \
++ pos = random_nums[index]; \
++ hash -= key[pos]; \
++}
++
++/*
++ * The main random sample hash function.
++ */
++static u32 random_sample_hash(void *addr, u32 hash_strength)
++{
++ u32 hash = 0xdeadbeef;
++ int index, pos, loop = hash_strength;
++ u32 *key = (u32 *)addr;
++
++ if (loop > HASH_STRENGTH_FULL)
++ loop = HASH_STRENGTH_FULL;
++
++ HASH_FROM_TO(0, loop);
++
++ if (hash_strength > HASH_STRENGTH_FULL) {
++ loop = hash_strength - HASH_STRENGTH_FULL;
++ HASH_FROM_TO(0, loop);
++ }
++
++ return hash;
++}
++
++
++/**
++ * It's used when hash strength is adjusted
++ *
++ * @addr The page's virtual address
++ * @from The original hash strength
++ * @to The hash strength changed to
++ * @hash The hash value generated with "from" hash value
++ *
++ * return the hash value
++ */
++static u32 delta_hash(void *addr, int from, int to, u32 hash)
++{
++ u32 *key = (u32 *)addr;
++ int index, pos; /* make sure they are int type */
++
++ if (to > from) {
++ if (from >= HASH_STRENGTH_FULL) {
++ from -= HASH_STRENGTH_FULL;
++ to -= HASH_STRENGTH_FULL;
++ HASH_FROM_TO(from, to);
++ } else if (to <= HASH_STRENGTH_FULL) {
++ HASH_FROM_TO(from, to);
++ } else {
++ HASH_FROM_TO(from, HASH_STRENGTH_FULL);
++ HASH_FROM_TO(0, to - HASH_STRENGTH_FULL);
++ }
++ } else {
++ if (from <= HASH_STRENGTH_FULL) {
++ HASH_FROM_DOWN_TO(from, to);
++ } else if (to >= HASH_STRENGTH_FULL) {
++ from -= HASH_STRENGTH_FULL;
++ to -= HASH_STRENGTH_FULL;
++ HASH_FROM_DOWN_TO(from, to);
++ } else {
++ HASH_FROM_DOWN_TO(from - HASH_STRENGTH_FULL, 0);
++ HASH_FROM_DOWN_TO(HASH_STRENGTH_FULL, to);
++ }
++ }
++
++ return hash;
++}
++
++/**
++ *
++ * Called when: rshash_pos or rshash_neg is about to overflow or a scan round
++ * has finished.
++ *
++ * return 0 if no page has been scanned since last call, 1 otherwise.
++ */
++static inline int encode_benefit(void)
++{
++ u64 scanned_delta, pos_delta, neg_delta;
++ unsigned long base = benefit.base;
++
++ scanned_delta = uksm_pages_scanned - uksm_pages_scanned_last;
++
++ if (!scanned_delta)
++ return 0;
++
++ scanned_delta >>= base;
++ pos_delta = rshash_pos >> base;
++ neg_delta = rshash_neg >> base;
++
++ if (CAN_OVERFLOW_U64(benefit.pos, pos_delta) ||
++ CAN_OVERFLOW_U64(benefit.neg, neg_delta) ||
++ CAN_OVERFLOW_U64(benefit.scanned, scanned_delta)) {
++ benefit.scanned >>= 1;
++ benefit.neg >>= 1;
++ benefit.pos >>= 1;
++ benefit.base++;
++ scanned_delta >>= 1;
++ pos_delta >>= 1;
++ neg_delta >>= 1;
++ }
++
++ benefit.pos += pos_delta;
++ benefit.neg += neg_delta;
++ benefit.scanned += scanned_delta;
++
++ BUG_ON(!benefit.scanned);
++
++ rshash_pos = rshash_neg = 0;
++ uksm_pages_scanned_last = uksm_pages_scanned;
++
++ return 1;
++}
++
++static inline void reset_benefit(void)
++{
++ benefit.pos = 0;
++ benefit.neg = 0;
++ benefit.base = 0;
++ benefit.scanned = 0;
++}
++
++static inline void inc_rshash_pos(unsigned long delta)
++{
++ if (CAN_OVERFLOW_U64(rshash_pos, delta))
++ encode_benefit();
++
++ rshash_pos += delta;
++}
++
++static inline void inc_rshash_neg(unsigned long delta)
++{
++ if (CAN_OVERFLOW_U64(rshash_neg, delta))
++ encode_benefit();
++
++ rshash_neg += delta;
++}
++
++
++static inline u32 page_hash(struct page *page, unsigned long hash_strength,
++ int cost_accounting)
++{
++ u32 val;
++ unsigned long delta;
++
++ void *addr = kmap_atomic(page);
++
++ val = random_sample_hash(addr, hash_strength);
++ kunmap_atomic(addr);
++
++ if (cost_accounting) {
++ if (hash_strength < HASH_STRENGTH_FULL)
++ delta = HASH_STRENGTH_FULL - hash_strength;
++ else
++ delta = 0;
++
++ inc_rshash_pos(delta);
++ }
++
++ return val;
++}
++
++static int memcmp_pages_with_cost(struct page *page1, struct page *page2,
++ int cost_accounting)
++{
++ char *addr1, *addr2;
++ int ret;
++
++ addr1 = kmap_atomic(page1);
++ addr2 = kmap_atomic(page2);
++ ret = memcmp(addr1, addr2, PAGE_SIZE);
++ kunmap_atomic(addr2);
++ kunmap_atomic(addr1);
++
++ if (cost_accounting)
++ inc_rshash_neg(memcmp_cost);
++
++ return ret;
++}
++
++static inline int pages_identical_with_cost(struct page *page1, struct page *page2)
++{
++ return !memcmp_pages_with_cost(page1, page2, 0);
++}
++
++static inline int is_page_full_zero(struct page *page)
++{
++ char *addr;
++ int ret;
++
++ addr = kmap_atomic(page);
++ ret = is_full_zero(addr, PAGE_SIZE);
++ kunmap_atomic(addr);
++
++ return ret;
++}
++
++static int write_protect_page(struct vm_area_struct *vma, struct page *page,
++ pte_t *orig_pte, pte_t *old_pte)
++{
++ struct mm_struct *mm = vma->vm_mm;
++ struct page_vma_mapped_walk pvmw = {
++ .page = page,
++ .vma = vma,
++ };
++ struct mmu_notifier_range range;
++ int swapped;
++ int err = -EFAULT;
++
++ pvmw.address = page_address_in_vma(page, vma);
++ if (pvmw.address == -EFAULT)
++ goto out;
++
++ BUG_ON(PageTransCompound(page));
++
++ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, pvmw.address,
++ pvmw.address + PAGE_SIZE);
++ mmu_notifier_invalidate_range_start(&range);
++
++ if (!page_vma_mapped_walk(&pvmw))
++ goto out_mn;
++ if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
++ goto out_unlock;
++
++ if (old_pte)
++ *old_pte = *pvmw.pte;
++
++ if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
++ (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) || mm_tlb_flush_pending(mm)) {
++ pte_t entry;
++
++ swapped = PageSwapCache(page);
++ flush_cache_page(vma, pvmw.address, page_to_pfn(page));
++ /*
++ * Ok this is tricky, when get_user_pages_fast() run it doesn't
++ * take any lock, therefore the check that we are going to make
++ * with the pagecount against the mapcount is racey and
++ * O_DIRECT can happen right after the check.
++ * So we clear the pte and flush the tlb before the check
++ * this assure us that no O_DIRECT can happen after the check
++ * or in the middle of the check.
++ */
++ entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
++ /*
++ * Check that no O_DIRECT or similar I/O is in progress on the
++ * page
++ */
++ if (page_mapcount(page) + 1 + swapped != page_count(page)) {
++ set_pte_at(mm, pvmw.address, pvmw.pte, entry);
++ goto out_unlock;
++ }
++ if (pte_dirty(entry))
++ set_page_dirty(page);
++
++ if (pte_protnone(entry))
++ entry = pte_mkclean(pte_clear_savedwrite(entry));
++ else
++ entry = pte_mkclean(pte_wrprotect(entry));
++
++ set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
++ }
++ *orig_pte = *pvmw.pte;
++ err = 0;
++
++out_unlock:
++ page_vma_mapped_walk_done(&pvmw);
++out_mn:
++ mmu_notifier_invalidate_range_end(&range);
++out:
++ return err;
++}
++
++#define MERGE_ERR_PGERR 1 /* the page is invalid cannot continue */
++#define MERGE_ERR_COLLI 2 /* there is a collision */
++#define MERGE_ERR_COLLI_MAX 3 /* collision at the max hash strength */
++#define MERGE_ERR_CHANGED 4 /* the page has changed since last hash */
++
++
++/**
++ * replace_page - replace page in vma by new ksm page
++ * @vma: vma that holds the pte pointing to page
++ * @page: the page we are replacing by kpage
++ * @kpage: the ksm page we replace page by
++ * @orig_pte: the original value of the pte
++ *
++ * Returns 0 on success, MERGE_ERR_PGERR on failure.
++ */
++static int replace_page(struct vm_area_struct *vma, struct page *page,
++ struct page *kpage, pte_t orig_pte)
++{
++ struct mm_struct *mm = vma->vm_mm;
++ struct mmu_notifier_range range;
++ pgd_t *pgd;
++ p4d_t *p4d;
++ pud_t *pud;
++ pmd_t *pmd;
++ pte_t *ptep;
++ spinlock_t *ptl;
++ pte_t entry;
++
++ unsigned long addr;
++ int err = MERGE_ERR_PGERR;
++
++ addr = page_address_in_vma(page, vma);
++ if (addr == -EFAULT)
++ goto out;
++
++ pgd = pgd_offset(mm, addr);
++ if (!pgd_present(*pgd))
++ goto out;
++
++ p4d = p4d_offset(pgd, addr);
++ pud = pud_offset(p4d, addr);
++ if (!pud_present(*pud))
++ goto out;
++
++ pmd = pmd_offset(pud, addr);
++ BUG_ON(pmd_trans_huge(*pmd));
++ if (!pmd_present(*pmd))
++ goto out;
++
++ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
++ addr + PAGE_SIZE);
++ mmu_notifier_invalidate_range_start(&range);
++
++ ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++ if (!pte_same(*ptep, orig_pte)) {
++ pte_unmap_unlock(ptep, ptl);
++ goto out_mn;
++ }
++
++ flush_cache_page(vma, addr, pte_pfn(*ptep));
++ ptep_clear_flush_notify(vma, addr, ptep);
++ entry = mk_pte(kpage, vma->vm_page_prot);
++
++ /* special treatment is needed for zero_page */
++ if ((page_to_pfn(kpage) == uksm_zero_pfn) ||
++ (page_to_pfn(kpage) == zero_pfn)) {
++ entry = pte_mkspecial(entry);
++ dec_mm_counter(mm, MM_ANONPAGES);
++ inc_zone_page_state(page, NR_UKSM_ZERO_PAGES);
++ } else {
++ get_page(kpage);
++ page_add_anon_rmap(kpage, vma, addr, false);
++ }
++
++ set_pte_at_notify(mm, addr, ptep, entry);
++
++ page_remove_rmap(page, false);
++ if (!page_mapped(page))
++ try_to_free_swap(page);
++ put_page(page);
++
++ pte_unmap_unlock(ptep, ptl);
++ err = 0;
++out_mn:
++ mmu_notifier_invalidate_range_end(&range);
++out:
++ return err;
++}
++
++
++/**
++ * Fully hash a page with HASH_STRENGTH_MAX return a non-zero hash value. The
++ * zero hash value at HASH_STRENGTH_MAX is used to indicated that its
++ * hash_max member has not been calculated.
++ *
++ * @page The page needs to be hashed
++ * @hash_old The hash value calculated with current hash strength
++ *
++ * return the new hash value calculated at HASH_STRENGTH_MAX
++ */
++static inline u32 page_hash_max(struct page *page, u32 hash_old)
++{
++ u32 hash_max = 0;
++ void *addr;
++
++ addr = kmap_atomic(page);
++ hash_max = delta_hash(addr, hash_strength,
++ HASH_STRENGTH_MAX, hash_old);
++
++ kunmap_atomic(addr);
++
++ if (!hash_max)
++ hash_max = 1;
++
++ inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
++ return hash_max;
++}
++
++/*
++ * We compare the hash again, to ensure that it is really a hash collision
++ * instead of being caused by page write.
++ */
++static inline int check_collision(struct rmap_item *rmap_item,
++ u32 hash)
++{
++ int err;
++ struct page *page = rmap_item->page;
++
++ /* if this rmap_item has already been hash_maxed, then the collision
++ * must appears in the second-level rbtree search. In this case we check
++ * if its hash_max value has been changed. Otherwise, the collision
++ * happens in the first-level rbtree search, so we check against it's
++ * current hash value.
++ */
++ if (rmap_item->hash_max) {
++ inc_rshash_neg(memcmp_cost);
++ inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
++
++ if (rmap_item->hash_max == page_hash_max(page, hash))
++ err = MERGE_ERR_COLLI;
++ else
++ err = MERGE_ERR_CHANGED;
++ } else {
++ inc_rshash_neg(memcmp_cost + hash_strength);
++
++ if (page_hash(page, hash_strength, 0) == hash)
++ err = MERGE_ERR_COLLI;
++ else
++ err = MERGE_ERR_CHANGED;
++ }
++
++ return err;
++}
++
++/**
++ * Try to merge a rmap_item.page with a kpage in stable node. kpage must
++ * already be a ksm page.
++ *
++ * @return 0 if the pages were merged, -EFAULT otherwise.
++ */
++static int try_to_merge_with_uksm_page(struct rmap_item *rmap_item,
++ struct page *kpage, u32 hash)
++{
++ struct vm_area_struct *vma = rmap_item->slot->vma;
++ struct mm_struct *mm = vma->vm_mm;
++ pte_t orig_pte = __pte(0);
++ int err = MERGE_ERR_PGERR;
++ struct page *page;
++
++ if (uksm_test_exit(mm))
++ goto out;
++
++ page = rmap_item->page;
++
++ if (page == kpage) { /* ksm page forked */
++ err = 0;
++ goto out;
++ }
++
++ /*
++ * We need the page lock to read a stable PageSwapCache in
++ * write_protect_page(). We use trylock_page() instead of
++ * lock_page() because we don't want to wait here - we
++ * prefer to continue scanning and merging different pages,
++ * then come back to this page when it is unlocked.
++ */
++ if (!trylock_page(page))
++ goto out;
++
++ if (!PageAnon(page) || !PageKsm(kpage))
++ goto out_unlock;
++
++ if (PageTransCompound(page)) {
++ err = split_huge_page(page);
++ if (err)
++ goto out_unlock;
++ }
++
++ /*
++ * If this anonymous page is mapped only here, its pte may need
++ * to be write-protected. If it's mapped elsewhere, all of its
++ * ptes are necessarily already write-protected. But in either
++ * case, we need to lock and check page_count is not raised.
++ */
++ if (write_protect_page(vma, page, &orig_pte, NULL) == 0) {
++ if (pages_identical_with_cost(page, kpage))
++ err = replace_page(vma, page, kpage, orig_pte);
++ else
++ err = check_collision(rmap_item, hash);
++ }
++
++ if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
++ munlock_vma_page(page);
++ if (!PageMlocked(kpage)) {
++ unlock_page(page);
++ lock_page(kpage);
++ mlock_vma_page(kpage);
++ page = kpage; /* for final unlock */
++ }
++ }
++
++out_unlock:
++ unlock_page(page);
++out:
++ return err;
++}
++
++
++
++/**
++ * If two pages fail to merge in try_to_merge_two_pages, then we have a chance
++ * to restore a page mapping that has been changed in try_to_merge_two_pages.
++ *
++ * @return 0 on success.
++ */
++static int restore_uksm_page_pte(struct vm_area_struct *vma, unsigned long addr,
++ pte_t orig_pte, pte_t wprt_pte)
++{
++ struct mm_struct *mm = vma->vm_mm;
++ pgd_t *pgd;
++ p4d_t *p4d;
++ pud_t *pud;
++ pmd_t *pmd;
++ pte_t *ptep;
++ spinlock_t *ptl;
++
++ int err = -EFAULT;
++
++ pgd = pgd_offset(mm, addr);
++ if (!pgd_present(*pgd))
++ goto out;
++
++ p4d = p4d_offset(pgd, addr);
++ pud = pud_offset(p4d, addr);
++ if (!pud_present(*pud))
++ goto out;
++
++ pmd = pmd_offset(pud, addr);
++ if (!pmd_present(*pmd))
++ goto out;
++
++ ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++ if (!pte_same(*ptep, wprt_pte)) {
++ /* already copied, let it be */
++ pte_unmap_unlock(ptep, ptl);
++ goto out;
++ }
++
++ /*
++ * Good boy, still here. When we still get the ksm page, it does not
++ * return to the free page pool, there is no way that a pte was changed
++ * to other page and gets back to this page. And remind that ksm page
++ * do not reuse in do_wp_page(). So it's safe to restore the original
++ * pte.
++ */
++ flush_cache_page(vma, addr, pte_pfn(*ptep));
++ ptep_clear_flush_notify(vma, addr, ptep);
++ set_pte_at_notify(mm, addr, ptep, orig_pte);
++
++ pte_unmap_unlock(ptep, ptl);
++ err = 0;
++out:
++ return err;
++}
++
++/**
++ * try_to_merge_two_pages() - take two identical pages and prepare
++ * them to be merged into one page(rmap_item->page)
++ *
++ * @return 0 if we successfully merged two identical pages into
++ * one ksm page. MERGE_ERR_COLLI if it's only a hash collision
++ * search in rbtree. MERGE_ERR_CHANGED if rmap_item has been
++ * changed since it's hashed. MERGE_ERR_PGERR otherwise.
++ *
++ */
++static int try_to_merge_two_pages(struct rmap_item *rmap_item,
++ struct rmap_item *tree_rmap_item,
++ u32 hash)
++{
++ pte_t orig_pte1 = __pte(0), orig_pte2 = __pte(0);
++ pte_t wprt_pte1 = __pte(0), wprt_pte2 = __pte(0);
++ struct vm_area_struct *vma1 = rmap_item->slot->vma;
++ struct vm_area_struct *vma2 = tree_rmap_item->slot->vma;
++ struct page *page = rmap_item->page;
++ struct page *tree_page = tree_rmap_item->page;
++ int err = MERGE_ERR_PGERR;
++ struct address_space *saved_mapping;
++
++
++ if (rmap_item->page == tree_rmap_item->page)
++ goto out;
++
++ if (!trylock_page(page))
++ goto out;
++
++ if (!PageAnon(page))
++ goto out_unlock;
++
++ if (PageTransCompound(page)) {
++ err = split_huge_page(page);
++ if (err)
++ goto out_unlock;
++ }
++
++ if (write_protect_page(vma1, page, &wprt_pte1, &orig_pte1) != 0) {
++ unlock_page(page);
++ goto out;
++ }
++
++ /*
++ * While we hold page lock, upgrade page from
++ * PageAnon+anon_vma to PageKsm+NULL stable_node:
++ * stable_tree_insert() will update stable_node.
++ */
++ saved_mapping = page->mapping;
++ set_page_stable_node(page, NULL);
++ mark_page_accessed(page);
++ if (!PageDirty(page))
++ SetPageDirty(page);
++
++ unlock_page(page);
++
++ if (!trylock_page(tree_page))
++ goto restore_out;
++
++ if (!PageAnon(tree_page)) {
++ unlock_page(tree_page);
++ goto restore_out;
++ }
++
++ if (PageTransCompound(tree_page)) {
++ err = split_huge_page(tree_page);
++ if (err) {
++ unlock_page(tree_page);
++ goto restore_out;
++ }
++ }
++
++ if (write_protect_page(vma2, tree_page, &wprt_pte2, &orig_pte2) != 0) {
++ unlock_page(tree_page);
++ goto restore_out;
++ }
++
++ if (pages_identical_with_cost(page, tree_page)) {
++ err = replace_page(vma2, tree_page, page, wprt_pte2);
++ if (err) {
++ unlock_page(tree_page);
++ goto restore_out;
++ }
++
++ if ((vma2->vm_flags & VM_LOCKED)) {
++ munlock_vma_page(tree_page);
++ if (!PageMlocked(page)) {
++ unlock_page(tree_page);
++ lock_page(page);
++ mlock_vma_page(page);
++ tree_page = page; /* for final unlock */
++ }
++ }
++
++ unlock_page(tree_page);
++
++ goto out; /* success */
++
++ } else {
++ if (tree_rmap_item->hash_max &&
++ tree_rmap_item->hash_max == rmap_item->hash_max) {
++ err = MERGE_ERR_COLLI_MAX;
++ } else if (page_hash(page, hash_strength, 0) ==
++ page_hash(tree_page, hash_strength, 0)) {
++ inc_rshash_neg(memcmp_cost + hash_strength * 2);
++ err = MERGE_ERR_COLLI;
++ } else {
++ err = MERGE_ERR_CHANGED;
++ }
++
++ unlock_page(tree_page);
++ }
++
++restore_out:
++ lock_page(page);
++ if (!restore_uksm_page_pte(vma1, get_rmap_addr(rmap_item),
++ orig_pte1, wprt_pte1))
++ page->mapping = saved_mapping;
++
++out_unlock:
++ unlock_page(page);
++out:
++ return err;
++}
++
++static inline int hash_cmp(u32 new_val, u32 node_val)
++{
++ if (new_val > node_val)
++ return 1;
++ else if (new_val < node_val)
++ return -1;
++ else
++ return 0;
++}
++
++static inline u32 rmap_item_hash_max(struct rmap_item *item, u32 hash)
++{
++ u32 hash_max = item->hash_max;
++
++ if (!hash_max) {
++ hash_max = page_hash_max(item->page, hash);
++
++ item->hash_max = hash_max;
++ }
++
++ return hash_max;
++}
++
++
++
++/**
++ * stable_tree_search() - search the stable tree for a page
++ *
++ * @item: the rmap_item we are comparing with
++ * @hash: the hash value of this item->page already calculated
++ *
++ * @return the page we have found, NULL otherwise. The page returned has
++ * been gotten.
++ */
++static struct page *stable_tree_search(struct rmap_item *item, u32 hash)
++{
++ struct rb_node *node = root_stable_treep->rb_node;
++ struct tree_node *tree_node;
++ unsigned long hash_max;
++ struct page *page = item->page;
++ struct stable_node *stable_node;
++
++ stable_node = page_stable_node(page);
++ if (stable_node) {
++ /* ksm page forked, that is
++ * if (PageKsm(page) && !in_stable_tree(rmap_item))
++ * it's actually gotten once outside.
++ */
++ get_page(page);
++ return page;
++ }
++
++ while (node) {
++ int cmp;
++
++ tree_node = rb_entry(node, struct tree_node, node);
++
++ cmp = hash_cmp(hash, tree_node->hash);
++
++ if (cmp < 0)
++ node = node->rb_left;
++ else if (cmp > 0)
++ node = node->rb_right;
++ else
++ break;
++ }
++
++ if (!node)
++ return NULL;
++
++ if (tree_node->count == 1) {
++ stable_node = rb_entry(tree_node->sub_root.rb_node,
++ struct stable_node, node);
++ BUG_ON(!stable_node);
++
++ goto get_page_out;
++ }
++
++ /*
++ * ok, we have to search the second
++ * level subtree, hash the page to a
++ * full strength.
++ */
++ node = tree_node->sub_root.rb_node;
++ BUG_ON(!node);
++ hash_max = rmap_item_hash_max(item, hash);
++
++ while (node) {
++ int cmp;
++
++ stable_node = rb_entry(node, struct stable_node, node);
++
++ cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++ if (cmp < 0)
++ node = node->rb_left;
++ else if (cmp > 0)
++ node = node->rb_right;
++ else
++ goto get_page_out;
++ }
++
++ return NULL;
++
++get_page_out:
++ page = get_uksm_page(stable_node, 1, 1);
++ return page;
++}
++
++static int try_merge_rmap_item(struct rmap_item *item,
++ struct page *kpage,
++ struct page *tree_page)
++{
++ struct vm_area_struct *vma = item->slot->vma;
++ struct page_vma_mapped_walk pvmw = {
++ .page = kpage,
++ .vma = vma,
++ };
++
++ pvmw.address = get_rmap_addr(item);
++ if (!page_vma_mapped_walk(&pvmw))
++ return 0;
++
++ if (pte_write(*pvmw.pte)) {
++ /* has changed, abort! */
++ page_vma_mapped_walk_done(&pvmw);
++ return 0;
++ }
++
++ get_page(tree_page);
++ page_add_anon_rmap(tree_page, vma, pvmw.address, false);
++
++ flush_cache_page(vma, pvmw.address, page_to_pfn(kpage));
++ ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
++ set_pte_at_notify(vma->vm_mm, pvmw.address, pvmw.pte,
++ mk_pte(tree_page, vma->vm_page_prot));
++
++ page_remove_rmap(kpage, false);
++ put_page(kpage);
++
++ page_vma_mapped_walk_done(&pvmw);
++
++ return 1;
++}
++
++/**
++ * try_to_merge_with_stable_page() - when two rmap_items need to be inserted
++ * into stable tree, the page was found to be identical to a stable ksm page,
++ * this is the last chance we can merge them into one.
++ *
++ * @item1: the rmap_item holding the page which we wanted to insert
++ * into stable tree.
++ * @item2: the other rmap_item we found when unstable tree search
++ * @oldpage: the page currently mapped by the two rmap_items
++ * @tree_page: the page we found identical in stable tree node
++ * @success1: return if item1 is successfully merged
++ * @success2: return if item2 is successfully merged
++ */
++static void try_merge_with_stable(struct rmap_item *item1,
++ struct rmap_item *item2,
++ struct page **kpage,
++ struct page *tree_page,
++ int *success1, int *success2)
++{
++ struct vm_area_struct *vma1 = item1->slot->vma;
++ struct vm_area_struct *vma2 = item2->slot->vma;
++ *success1 = 0;
++ *success2 = 0;
++
++ if (unlikely(*kpage == tree_page)) {
++ /* I don't think this can really happen */
++ pr_warn("UKSM: unexpected condition detected in "
++ "%s -- *kpage == tree_page !\n", __func__);
++ *success1 = 1;
++ *success2 = 1;
++ return;
++ }
++
++ if (!PageAnon(*kpage) || !PageKsm(*kpage))
++ goto failed;
++
++ if (!trylock_page(tree_page))
++ goto failed;
++
++ /* If the oldpage is still ksm and still pointed
++ * to in the right place, and still write protected,
++ * we are confident it's not changed, no need to
++ * memcmp anymore.
++ * be ware, we cannot take nested pte locks,
++ * deadlock risk.
++ */
++ if (!try_merge_rmap_item(item1, *kpage, tree_page))
++ goto unlock_failed;
++
++ /* ok, then vma2, remind that pte1 already set */
++ if (!try_merge_rmap_item(item2, *kpage, tree_page))
++ goto success_1;
++
++ *success2 = 1;
++success_1:
++ *success1 = 1;
++
++
++ if ((*success1 && vma1->vm_flags & VM_LOCKED) ||
++ (*success2 && vma2->vm_flags & VM_LOCKED)) {
++ munlock_vma_page(*kpage);
++ if (!PageMlocked(tree_page))
++ mlock_vma_page(tree_page);
++ }
++
++ /*
++ * We do not need oldpage any more in the caller, so can break the lock
++ * now.
++ */
++ unlock_page(*kpage);
++ *kpage = tree_page; /* Get unlocked outside. */
++ return;
++
++unlock_failed:
++ unlock_page(tree_page);
++failed:
++ return;
++}
++
++static inline void stable_node_hash_max(struct stable_node *node,
++ struct page *page, u32 hash)
++{
++ u32 hash_max = node->hash_max;
++
++ if (!hash_max) {
++ hash_max = page_hash_max(page, hash);
++ node->hash_max = hash_max;
++ }
++}
++
++static inline
++struct stable_node *new_stable_node(struct tree_node *tree_node,
++ struct page *kpage, u32 hash_max)
++{
++ struct stable_node *new_stable_node;
++
++ new_stable_node = alloc_stable_node();
++ if (!new_stable_node)
++ return NULL;
++
++ new_stable_node->kpfn = page_to_pfn(kpage);
++ new_stable_node->hash_max = hash_max;
++ new_stable_node->tree_node = tree_node;
++ set_page_stable_node(kpage, new_stable_node);
++
++ return new_stable_node;
++}
++
++static inline
++struct stable_node *first_level_insert(struct tree_node *tree_node,
++ struct rmap_item *rmap_item,
++ struct rmap_item *tree_rmap_item,
++ struct page **kpage, u32 hash,
++ int *success1, int *success2)
++{
++ int cmp;
++ struct page *tree_page;
++ u32 hash_max = 0;
++ struct stable_node *stable_node, *new_snode;
++ struct rb_node *parent = NULL, **new;
++
++ /* this tree node contains no sub-tree yet */
++ stable_node = rb_entry(tree_node->sub_root.rb_node,
++ struct stable_node, node);
++
++ tree_page = get_uksm_page(stable_node, 1, 0);
++ if (tree_page) {
++ cmp = memcmp_pages_with_cost(*kpage, tree_page, 1);
++ if (!cmp) {
++ try_merge_with_stable(rmap_item, tree_rmap_item, kpage,
++ tree_page, success1, success2);
++ put_page(tree_page);
++ if (!*success1 && !*success2)
++ goto failed;
++
++ return stable_node;
++
++ } else {
++ /*
++ * collision in first level try to create a subtree.
++ * A new node need to be created.
++ */
++ put_page(tree_page);
++
++ stable_node_hash_max(stable_node, tree_page,
++ tree_node->hash);
++ hash_max = rmap_item_hash_max(rmap_item, hash);
++ cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++ parent = &stable_node->node;
++ if (cmp < 0)
++ new = &parent->rb_left;
++ else if (cmp > 0)
++ new = &parent->rb_right;
++ else
++ goto failed;
++ }
++
++ } else {
++ /* the only stable_node deleted, we reuse its tree_node.
++ */
++ parent = NULL;
++ new = &tree_node->sub_root.rb_node;
++ }
++
++ new_snode = new_stable_node(tree_node, *kpage, hash_max);
++ if (!new_snode)
++ goto failed;
++
++ rb_link_node(&new_snode->node, parent, new);
++ rb_insert_color(&new_snode->node, &tree_node->sub_root);
++ tree_node->count++;
++ *success1 = *success2 = 1;
++
++ return new_snode;
++
++failed:
++ return NULL;
++}
++
++static inline
++struct stable_node *stable_subtree_insert(struct tree_node *tree_node,
++ struct rmap_item *rmap_item,
++ struct rmap_item *tree_rmap_item,
++ struct page **kpage, u32 hash,
++ int *success1, int *success2)
++{
++ struct page *tree_page;
++ u32 hash_max;
++ struct stable_node *stable_node, *new_snode;
++ struct rb_node *parent, **new;
++
++research:
++ parent = NULL;
++ new = &tree_node->sub_root.rb_node;
++ BUG_ON(!*new);
++ hash_max = rmap_item_hash_max(rmap_item, hash);
++ while (*new) {
++ int cmp;
++
++ stable_node = rb_entry(*new, struct stable_node, node);
++
++ cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++ if (cmp < 0) {
++ parent = *new;
++ new = &parent->rb_left;
++ } else if (cmp > 0) {
++ parent = *new;
++ new = &parent->rb_right;
++ } else {
++ tree_page = get_uksm_page(stable_node, 1, 0);
++ if (tree_page) {
++ cmp = memcmp_pages_with_cost(*kpage, tree_page, 1);
++ if (!cmp) {
++ try_merge_with_stable(rmap_item,
++ tree_rmap_item, kpage,
++ tree_page, success1, success2);
++
++ put_page(tree_page);
++ if (!*success1 && !*success2)
++ goto failed;
++ /*
++ * successfully merged with a stable
++ * node
++ */
++ return stable_node;
++ } else {
++ put_page(tree_page);
++ goto failed;
++ }
++ } else {
++ /*
++ * stable node may be deleted,
++ * and subtree maybe
++ * restructed, cannot
++ * continue, research it.
++ */
++ if (tree_node->count) {
++ goto research;
++ } else {
++ /* reuse the tree node*/
++ parent = NULL;
++ new = &tree_node->sub_root.rb_node;
++ }
++ }
++ }
++ }
++
++ new_snode = new_stable_node(tree_node, *kpage, hash_max);
++ if (!new_snode)
++ goto failed;
++
++ rb_link_node(&new_snode->node, parent, new);
++ rb_insert_color(&new_snode->node, &tree_node->sub_root);
++ tree_node->count++;
++ *success1 = *success2 = 1;
++
++ return new_snode;
++
++failed:
++ return NULL;
++}
++
++
++/**
++ * stable_tree_insert() - try to insert a merged page in unstable tree to
++ * the stable tree
++ *
++ * @kpage: the page need to be inserted
++ * @hash: the current hash of this page
++ * @rmap_item: the rmap_item being scanned
++ * @tree_rmap_item: the rmap_item found on unstable tree
++ * @success1: return if rmap_item is merged
++ * @success2: return if tree_rmap_item is merged
++ *
++ * @return the stable_node on stable tree if at least one
++ * rmap_item is inserted into stable tree, NULL
++ * otherwise.
++ */
++static struct stable_node *
++stable_tree_insert(struct page **kpage, u32 hash,
++ struct rmap_item *rmap_item,
++ struct rmap_item *tree_rmap_item,
++ int *success1, int *success2)
++{
++ struct rb_node **new = &root_stable_treep->rb_node;
++ struct rb_node *parent = NULL;
++ struct stable_node *stable_node;
++ struct tree_node *tree_node;
++ u32 hash_max = 0;
++
++ *success1 = *success2 = 0;
++
++ while (*new) {
++ int cmp;
++
++ tree_node = rb_entry(*new, struct tree_node, node);
++
++ cmp = hash_cmp(hash, tree_node->hash);
++
++ if (cmp < 0) {
++ parent = *new;
++ new = &parent->rb_left;
++ } else if (cmp > 0) {
++ parent = *new;
++ new = &parent->rb_right;
++ } else
++ break;
++ }
++
++ if (*new) {
++ if (tree_node->count == 1) {
++ stable_node = first_level_insert(tree_node, rmap_item,
++ tree_rmap_item, kpage,
++ hash, success1, success2);
++ } else {
++ stable_node = stable_subtree_insert(tree_node,
++ rmap_item, tree_rmap_item, kpage,
++ hash, success1, success2);
++ }
++ } else {
++
++ /* no tree node found */
++ tree_node = alloc_tree_node(stable_tree_node_listp);
++ if (!tree_node) {
++ stable_node = NULL;
++ goto out;
++ }
++
++ stable_node = new_stable_node(tree_node, *kpage, hash_max);
++ if (!stable_node) {
++ free_tree_node(tree_node);
++ goto out;
++ }
++
++ tree_node->hash = hash;
++ rb_link_node(&tree_node->node, parent, new);
++ rb_insert_color(&tree_node->node, root_stable_treep);
++ parent = NULL;
++ new = &tree_node->sub_root.rb_node;
++
++ rb_link_node(&stable_node->node, parent, new);
++ rb_insert_color(&stable_node->node, &tree_node->sub_root);
++ tree_node->count++;
++ *success1 = *success2 = 1;
++ }
++
++out:
++ return stable_node;
++}
++
++
++/**
++ * get_tree_rmap_item_page() - try to get the page and lock the mmap_sem
++ *
++ * @return 0 on success, -EBUSY if unable to lock the mmap_sem,
++ * -EINVAL if the page mapping has been changed.
++ */
++static inline int get_tree_rmap_item_page(struct rmap_item *tree_rmap_item)
++{
++ int err;
++
++ err = get_mergeable_page_lock_mmap(tree_rmap_item);
++
++ if (err == -EINVAL) {
++ /* its page map has been changed, remove it */
++ remove_rmap_item_from_tree(tree_rmap_item);
++ }
++
++ /* The page is gotten and mmap_sem is locked now. */
++ return err;
++}
++
++
++/**
++ * unstable_tree_search_insert() - search an unstable tree rmap_item with the
++ * same hash value. Get its page and trylock the mmap_sem
++ */
++static inline
++struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
++ u32 hash)
++
++{
++ struct rb_node **new = &root_unstable_tree.rb_node;
++ struct rb_node *parent = NULL;
++ struct tree_node *tree_node;
++ u32 hash_max;
++ struct rmap_item *tree_rmap_item;
++
++ while (*new) {
++ int cmp;
++
++ tree_node = rb_entry(*new, struct tree_node, node);
++
++ cmp = hash_cmp(hash, tree_node->hash);
++
++ if (cmp < 0) {
++ parent = *new;
++ new = &parent->rb_left;
++ } else if (cmp > 0) {
++ parent = *new;
++ new = &parent->rb_right;
++ } else
++ break;
++ }
++
++ if (*new) {
++ /* got the tree_node */
++ if (tree_node->count == 1) {
++ tree_rmap_item = rb_entry(tree_node->sub_root.rb_node,
++ struct rmap_item, node);
++ BUG_ON(!tree_rmap_item);
++
++ goto get_page_out;
++ }
++
++ /* well, search the collision subtree */
++ new = &tree_node->sub_root.rb_node;
++ BUG_ON(!*new);
++ hash_max = rmap_item_hash_max(rmap_item, hash);
++
++ while (*new) {
++ int cmp;
++
++ tree_rmap_item = rb_entry(*new, struct rmap_item,
++ node);
++
++ cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
++ parent = *new;
++ if (cmp < 0)
++ new = &parent->rb_left;
++ else if (cmp > 0)
++ new = &parent->rb_right;
++ else
++ goto get_page_out;
++ }
++ } else {
++ /* alloc a new tree_node */
++ tree_node = alloc_tree_node(&unstable_tree_node_list);
++ if (!tree_node)
++ return NULL;
++
++ tree_node->hash = hash;
++ rb_link_node(&tree_node->node, parent, new);
++ rb_insert_color(&tree_node->node, &root_unstable_tree);
++ parent = NULL;
++ new = &tree_node->sub_root.rb_node;
++ }
++
++ /* did not found even in sub-tree */
++ rmap_item->tree_node = tree_node;
++ rmap_item->address |= UNSTABLE_FLAG;
++ rmap_item->hash_round = uksm_hash_round;
++ rb_link_node(&rmap_item->node, parent, new);
++ rb_insert_color(&rmap_item->node, &tree_node->sub_root);
++
++ uksm_pages_unshared++;
++ return NULL;
++
++get_page_out:
++ if (tree_rmap_item->page == rmap_item->page)
++ return NULL;
++
++ if (get_tree_rmap_item_page(tree_rmap_item))
++ return NULL;
++
++ return tree_rmap_item;
++}
++
++static void hold_anon_vma(struct rmap_item *rmap_item,
++ struct anon_vma *anon_vma)
++{
++ rmap_item->anon_vma = anon_vma;
++ get_anon_vma(anon_vma);
++}
++
++
++/**
++ * stable_tree_append() - append a rmap_item to a stable node. Deduplication
++ * ratio statistics is done in this function.
++ *
++ */
++static void stable_tree_append(struct rmap_item *rmap_item,
++ struct stable_node *stable_node, int logdedup)
++{
++ struct node_vma *node_vma = NULL, *new_node_vma, *node_vma_cont = NULL;
++ unsigned long key = (unsigned long)rmap_item->slot;
++ unsigned long factor = rmap_item->slot->rung->step;
++
++ BUG_ON(!stable_node);
++ rmap_item->address |= STABLE_FLAG;
++
++ if (hlist_empty(&stable_node->hlist)) {
++ uksm_pages_shared++;
++ goto node_vma_new;
++ } else {
++ uksm_pages_sharing++;
++ }
++
++ hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) {
++ if (node_vma->key >= key)
++ break;
++
++ if (logdedup) {
++ node_vma->slot->pages_bemerged += factor;
++ if (list_empty(&node_vma->slot->dedup_list))
++ list_add(&node_vma->slot->dedup_list,
++ &vma_slot_dedup);
++ }
++ }
++
++ if (node_vma) {
++ if (node_vma->key == key) {
++ node_vma_cont = hlist_entry_safe(node_vma->hlist.next, struct node_vma, hlist);
++ goto node_vma_ok;
++ } else if (node_vma->key > key) {
++ node_vma_cont = node_vma;
++ }
++ }
++
++node_vma_new:
++ /* no same vma already in node, alloc a new node_vma */
++ new_node_vma = alloc_node_vma();
++ BUG_ON(!new_node_vma);
++ new_node_vma->head = stable_node;
++ new_node_vma->slot = rmap_item->slot;
++
++ if (!node_vma) {
++ hlist_add_head(&new_node_vma->hlist, &stable_node->hlist);
++ } else if (node_vma->key != key) {
++ if (node_vma->key < key)
++ hlist_add_behind(&new_node_vma->hlist, &node_vma->hlist);
++ else {
++ hlist_add_before(&new_node_vma->hlist,
++ &node_vma->hlist);
++ }
++
++ }
++ node_vma = new_node_vma;
++
++node_vma_ok: /* ok, ready to add to the list */
++ rmap_item->head = node_vma;
++ hlist_add_head(&rmap_item->hlist, &node_vma->rmap_hlist);
++ hold_anon_vma(rmap_item, rmap_item->slot->vma->anon_vma);
++ if (logdedup) {
++ rmap_item->slot->pages_merged++;
++ if (node_vma_cont) {
++ node_vma = node_vma_cont;
++ hlist_for_each_entry_continue(node_vma, hlist) {
++ node_vma->slot->pages_bemerged += factor;
++ if (list_empty(&node_vma->slot->dedup_list))
++ list_add(&node_vma->slot->dedup_list,
++ &vma_slot_dedup);
++ }
++ }
++ }
++}
++
++/*
++ * We use break_ksm to break COW on a ksm page: it's a stripped down
++ *
++ * if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
++ * put_page(page);
++ *
++ * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
++ * in case the application has unmapped and remapped mm,addr meanwhile.
++ * Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP
++ * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
++ */
++static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
++{
++ struct page *page;
++ int ret = 0;
++
++ do {
++ cond_resched();
++ page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
++ if (IS_ERR_OR_NULL(page))
++ break;
++ if (PageKsm(page)) {
++ ret = handle_mm_fault(vma, addr,
++ FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE,
++ NULL);
++ } else
++ ret = VM_FAULT_WRITE;
++ put_page(page);
++ } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
++ /*
++ * We must loop because handle_mm_fault() may back out if there's
++ * any difficulty e.g. if pte accessed bit gets updated concurrently.
++ *
++ * VM_FAULT_WRITE is what we have been hoping for: it indicates that
++ * COW has been broken, even if the vma does not permit VM_WRITE;
++ * but note that a concurrent fault might break PageKsm for us.
++ *
++ * VM_FAULT_SIGBUS could occur if we race with truncation of the
++ * backing file, which also invalidates anonymous pages: that's
++ * okay, that truncation will have unmapped the PageKsm for us.
++ *
++ * VM_FAULT_OOM: at the time of writing (late July 2009), setting
++ * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
++ * current task has TIF_MEMDIE set, and will be OOM killed on return
++ * to user; and ksmd, having no mm, would never be chosen for that.
++ *
++ * But if the mm is in a limited mem_cgroup, then the fault may fail
++ * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
++ * even ksmd can fail in this way - though it's usually breaking ksm
++ * just to undo a merge it made a moment before, so unlikely to oom.
++ *
++ * That's a pity: we might therefore have more kernel pages allocated
++ * than we're counting as nodes in the stable tree; but uksm_do_scan
++ * will retry to break_cow on each pass, so should recover the page
++ * in due course. The important thing is to not let VM_MERGEABLE
++ * be cleared while any such pages might remain in the area.
++ */
++ return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
++}
++
++static void break_cow(struct rmap_item *rmap_item)
++{
++ struct vm_area_struct *vma = rmap_item->slot->vma;
++ struct mm_struct *mm = vma->vm_mm;
++ unsigned long addr = get_rmap_addr(rmap_item);
++
++ if (uksm_test_exit(mm))
++ goto out;
++
++ break_ksm(vma, addr);
++out:
++ return;
++}
++
++/*
++ * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather
++ * than check every pte of a given vma, the locking doesn't quite work for
++ * that - an rmap_item is assigned to the stable tree after inserting ksm
++ * page and upping mmap_sem. Nor does it fit with the way we skip dup'ing
++ * rmap_items from parent to child at fork time (so as not to waste time
++ * if exit comes before the next scan reaches it).
++ *
++ * Similarly, although we'd like to remove rmap_items (so updating counts
++ * and freeing memory) when unmerging an area, it's easier to leave that
++ * to the next pass of ksmd - consider, for example, how ksmd might be
++ * in cmp_and_merge_page on one of the rmap_items we would be removing.
++ */
++inline int unmerge_uksm_pages(struct vm_area_struct *vma,
++ unsigned long start, unsigned long end)
++{
++ unsigned long addr;
++ int err = 0;
++
++ for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
++ if (uksm_test_exit(vma->vm_mm))
++ break;
++ if (signal_pending(current))
++ err = -ERESTARTSYS;
++ else
++ err = break_ksm(vma, addr);
++ }
++ return err;
++}
++
++static inline void inc_uksm_pages_scanned(void)
++{
++ u64 delta;
++
++
++ if (uksm_pages_scanned == U64_MAX) {
++ encode_benefit();
++
++ delta = uksm_pages_scanned >> pages_scanned_base;
++
++ if (CAN_OVERFLOW_U64(pages_scanned_stored, delta)) {
++ pages_scanned_stored >>= 1;
++ delta >>= 1;
++ pages_scanned_base++;
++ }
++
++ pages_scanned_stored += delta;
++
++ uksm_pages_scanned = uksm_pages_scanned_last = 0;
++ }
++
++ uksm_pages_scanned++;
++}
++
++static inline int find_zero_page_hash(int strength, u32 hash)
++{
++ return (zero_hash_table[strength] == hash);
++}
++
++static
++int cmp_and_merge_zero_page(struct vm_area_struct *vma, struct page *page)
++{
++ struct page *zero_page = empty_uksm_zero_page;
++ struct mm_struct *mm = vma->vm_mm;
++ pte_t orig_pte = __pte(0);
++ int err = -EFAULT;
++
++ if (uksm_test_exit(mm))
++ goto out;
++
++ if (!trylock_page(page))
++ goto out;
++
++ if (!PageAnon(page))
++ goto out_unlock;
++
++ if (PageTransCompound(page)) {
++ err = split_huge_page(page);
++ if (err)
++ goto out_unlock;
++ }
++
++ if (write_protect_page(vma, page, &orig_pte, 0) == 0) {
++ if (is_page_full_zero(page))
++ err = replace_page(vma, page, zero_page, orig_pte);
++ }
++
++out_unlock:
++ unlock_page(page);
++out:
++ return err;
++}
++
++/*
++ * cmp_and_merge_page() - first see if page can be merged into the stable
++ * tree; if not, compare hash to previous and if it's the same, see if page
++ * can be inserted into the unstable tree, or merged with a page already there
++ * and both transferred to the stable tree.
++ *
++ * @page: the page that we are searching identical page to.
++ * @rmap_item: the reverse mapping into the virtual address of this page
++ */
++static void cmp_and_merge_page(struct rmap_item *rmap_item, u32 hash)
++{
++ struct rmap_item *tree_rmap_item;
++ struct page *page;
++ struct page *kpage = NULL;
++ u32 hash_max;
++ int err;
++ unsigned int success1, success2;
++ struct stable_node *snode;
++ int cmp;
++ struct rb_node *parent = NULL, **new;
++
++ remove_rmap_item_from_tree(rmap_item);
++ page = rmap_item->page;
++
++ /* We first start with searching the page inside the stable tree */
++ kpage = stable_tree_search(rmap_item, hash);
++ if (kpage) {
++ err = try_to_merge_with_uksm_page(rmap_item, kpage,
++ hash);
++ if (!err) {
++ /*
++ * The page was successfully merged, add
++ * its rmap_item to the stable tree.
++ * page lock is needed because it's
++ * racing with try_to_unmap_ksm(), etc.
++ */
++ lock_page(kpage);
++ snode = page_stable_node(kpage);
++ stable_tree_append(rmap_item, snode, 1);
++ unlock_page(kpage);
++ put_page(kpage);
++ return; /* success */
++ }
++ put_page(kpage);
++
++ /*
++ * if it's a collision and it has been search in sub-rbtree
++ * (hash_max != 0), we want to abort, because if it is
++ * successfully merged in unstable tree, the collision trends to
++ * happen again.
++ */
++ if (err == MERGE_ERR_COLLI && rmap_item->hash_max)
++ return;
++ }
++
++ tree_rmap_item =
++ unstable_tree_search_insert(rmap_item, hash);
++ if (tree_rmap_item) {
++ err = try_to_merge_two_pages(rmap_item, tree_rmap_item, hash);
++ /*
++ * As soon as we merge this page, we want to remove the
++ * rmap_item of the page we have merged with from the unstable
++ * tree, and insert it instead as new node in the stable tree.
++ */
++ if (!err) {
++ kpage = page;
++ remove_rmap_item_from_tree(tree_rmap_item);
++ lock_page(kpage);
++ snode = stable_tree_insert(&kpage, hash,
++ rmap_item, tree_rmap_item,
++ &success1, &success2);
++
++ /*
++ * Do not log dedup for tree item, it's not counted as
++ * scanned in this round.
++ */
++ if (success2)
++ stable_tree_append(tree_rmap_item, snode, 0);
++
++ /*
++ * The order of these two stable append is important:
++ * we are scanning rmap_item.
++ */
++ if (success1)
++ stable_tree_append(rmap_item, snode, 1);
++
++ /*
++ * The original kpage may be unlocked inside
++ * stable_tree_insert() already. This page
++ * should be unlocked before doing
++ * break_cow().
++ */
++ unlock_page(kpage);
++
++ if (!success1)
++ break_cow(rmap_item);
++
++ if (!success2)
++ break_cow(tree_rmap_item);
++
++ } else if (err == MERGE_ERR_COLLI) {
++ BUG_ON(tree_rmap_item->tree_node->count > 1);
++
++ rmap_item_hash_max(tree_rmap_item,
++ tree_rmap_item->tree_node->hash);
++
++ hash_max = rmap_item_hash_max(rmap_item, hash);
++ cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
++ parent = &tree_rmap_item->node;
++ if (cmp < 0)
++ new = &parent->rb_left;
++ else if (cmp > 0)
++ new = &parent->rb_right;
++ else
++ goto put_up_out;
++
++ rmap_item->tree_node = tree_rmap_item->tree_node;
++ rmap_item->address |= UNSTABLE_FLAG;
++ rmap_item->hash_round = uksm_hash_round;
++ rb_link_node(&rmap_item->node, parent, new);
++ rb_insert_color(&rmap_item->node,
++ &tree_rmap_item->tree_node->sub_root);
++ rmap_item->tree_node->count++;
++ } else {
++ /*
++ * either one of the page has changed or they collide
++ * at the max hash, we consider them as ill items.
++ */
++ remove_rmap_item_from_tree(tree_rmap_item);
++ }
++put_up_out:
++ put_page(tree_rmap_item->page);
++ mmap_read_unlock(tree_rmap_item->slot->vma->vm_mm);
++ }
++}
++
++
++
++
++static inline unsigned long get_pool_index(struct vma_slot *slot,
++ unsigned long index)
++{
++ unsigned long pool_index;
++
++ pool_index = (sizeof(struct rmap_list_entry *) * index) >> PAGE_SHIFT;
++ if (pool_index >= slot->pool_size)
++ BUG();
++ return pool_index;
++}
++
++static inline unsigned long index_page_offset(unsigned long index)
++{
++ return offset_in_page(sizeof(struct rmap_list_entry *) * index);
++}
++
++static inline
++struct rmap_list_entry *get_rmap_list_entry(struct vma_slot *slot,
++ unsigned long index, int need_alloc)
++{
++ unsigned long pool_index;
++ struct page *page;
++ void *addr;
++
++
++ pool_index = get_pool_index(slot, index);
++ if (!slot->rmap_list_pool[pool_index]) {
++ if (!need_alloc)
++ return NULL;
++
++ page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
++ if (!page)
++ return NULL;
++
++ slot->rmap_list_pool[pool_index] = page;
++ }
++
++ addr = kmap(slot->rmap_list_pool[pool_index]);
++ addr += index_page_offset(index);
++
++ return addr;
++}
++
++static inline void put_rmap_list_entry(struct vma_slot *slot,
++ unsigned long index)
++{
++ unsigned long pool_index;
++
++ pool_index = get_pool_index(slot, index);
++ BUG_ON(!slot->rmap_list_pool[pool_index]);
++ kunmap(slot->rmap_list_pool[pool_index]);
++}
++
++static inline int entry_is_new(struct rmap_list_entry *entry)
++{
++ return !entry->item;
++}
++
++static inline unsigned long get_index_orig_addr(struct vma_slot *slot,
++ unsigned long index)
++{
++ return slot->vma->vm_start + (index << PAGE_SHIFT);
++}
++
++static inline unsigned long get_entry_address(struct rmap_list_entry *entry)
++{
++ unsigned long addr;
++
++ if (is_addr(entry->addr))
++ addr = get_clean_addr(entry->addr);
++ else if (entry->item)
++ addr = get_rmap_addr(entry->item);
++ else
++ BUG();
++
++ return addr;
++}
++
++static inline struct rmap_item *get_entry_item(struct rmap_list_entry *entry)
++{
++ if (is_addr(entry->addr))
++ return NULL;
++
++ return entry->item;
++}
++
++static inline void inc_rmap_list_pool_count(struct vma_slot *slot,
++ unsigned long index)
++{
++ unsigned long pool_index;
++
++ pool_index = get_pool_index(slot, index);
++ BUG_ON(!slot->rmap_list_pool[pool_index]);
++ slot->pool_counts[pool_index]++;
++}
++
++static inline void dec_rmap_list_pool_count(struct vma_slot *slot,
++ unsigned long index)
++{
++ unsigned long pool_index;
++
++ pool_index = get_pool_index(slot, index);
++ BUG_ON(!slot->rmap_list_pool[pool_index]);
++ BUG_ON(!slot->pool_counts[pool_index]);
++ slot->pool_counts[pool_index]--;
++}
++
++static inline int entry_has_rmap(struct rmap_list_entry *entry)
++{
++ return !is_addr(entry->addr) && entry->item;
++}
++
++static inline void swap_entries(struct rmap_list_entry *entry1,
++ unsigned long index1,
++ struct rmap_list_entry *entry2,
++ unsigned long index2)
++{
++ struct rmap_list_entry tmp;
++
++ /* swapping two new entries is meaningless */
++ BUG_ON(entry_is_new(entry1) && entry_is_new(entry2));
++
++ tmp = *entry1;
++ *entry1 = *entry2;
++ *entry2 = tmp;
++
++ if (entry_has_rmap(entry1))
++ entry1->item->entry_index = index1;
++
++ if (entry_has_rmap(entry2))
++ entry2->item->entry_index = index2;
++
++ if (entry_has_rmap(entry1) && !entry_has_rmap(entry2)) {
++ inc_rmap_list_pool_count(entry1->item->slot, index1);
++ dec_rmap_list_pool_count(entry1->item->slot, index2);
++ } else if (!entry_has_rmap(entry1) && entry_has_rmap(entry2)) {
++ inc_rmap_list_pool_count(entry2->item->slot, index2);
++ dec_rmap_list_pool_count(entry2->item->slot, index1);
++ }
++}
++
++static inline void free_entry_item(struct rmap_list_entry *entry)
++{
++ unsigned long index;
++ struct rmap_item *item;
++
++ if (!is_addr(entry->addr)) {
++ BUG_ON(!entry->item);
++ item = entry->item;
++ entry->addr = get_rmap_addr(item);
++ set_is_addr(entry->addr);
++ index = item->entry_index;
++ remove_rmap_item_from_tree(item);
++ dec_rmap_list_pool_count(item->slot, index);
++ free_rmap_item(item);
++ }
++}
++
++static inline int pool_entry_boundary(unsigned long index)
++{
++ unsigned long linear_addr;
++
++ linear_addr = sizeof(struct rmap_list_entry *) * index;
++ return index && !offset_in_page(linear_addr);
++}
++
++static inline void try_free_last_pool(struct vma_slot *slot,
++ unsigned long index)
++{
++ unsigned long pool_index;
++
++ pool_index = get_pool_index(slot, index);
++ if (slot->rmap_list_pool[pool_index] &&
++ !slot->pool_counts[pool_index]) {
++ __free_page(slot->rmap_list_pool[pool_index]);
++ slot->rmap_list_pool[pool_index] = NULL;
++ slot->flags |= UKSM_SLOT_NEED_SORT;
++ }
++
++}
++
++static inline unsigned long vma_item_index(struct vm_area_struct *vma,
++ struct rmap_item *item)
++{
++ return (get_rmap_addr(item) - vma->vm_start) >> PAGE_SHIFT;
++}
++
++static int within_same_pool(struct vma_slot *slot,
++ unsigned long i, unsigned long j)
++{
++ unsigned long pool_i, pool_j;
++
++ pool_i = get_pool_index(slot, i);
++ pool_j = get_pool_index(slot, j);
++
++ return (pool_i == pool_j);
++}
++
++static void sort_rmap_entry_list(struct vma_slot *slot)
++{
++ unsigned long i, j;
++ struct rmap_list_entry *entry, *swap_entry;
++
++ entry = get_rmap_list_entry(slot, 0, 0);
++ for (i = 0; i < slot->pages; ) {
++
++ if (!entry)
++ goto skip_whole_pool;
++
++ if (entry_is_new(entry))
++ goto next_entry;
++
++ if (is_addr(entry->addr)) {
++ entry->addr = 0;
++ goto next_entry;
++ }
++
++ j = vma_item_index(slot->vma, entry->item);
++ if (j == i)
++ goto next_entry;
++
++ if (within_same_pool(slot, i, j))
++ swap_entry = entry + j - i;
++ else
++ swap_entry = get_rmap_list_entry(slot, j, 1);
++
++ swap_entries(entry, i, swap_entry, j);
++ if (!within_same_pool(slot, i, j))
++ put_rmap_list_entry(slot, j);
++ continue;
++
++skip_whole_pool:
++ i += PAGE_SIZE / sizeof(*entry);
++ if (i < slot->pages)
++ entry = get_rmap_list_entry(slot, i, 0);
++ continue;
++
++next_entry:
++ if (i >= slot->pages - 1 ||
++ !within_same_pool(slot, i, i + 1)) {
++ put_rmap_list_entry(slot, i);
++ if (i + 1 < slot->pages)
++ entry = get_rmap_list_entry(slot, i + 1, 0);
++ } else
++ entry++;
++ i++;
++ continue;
++ }
++
++ /* free empty pool entries which contain no rmap_item */
++ /* CAN be simplied to based on only pool_counts when bug freed !!!!! */
++ for (i = 0; i < slot->pool_size; i++) {
++ unsigned char has_rmap;
++ void *addr;
++
++ if (!slot->rmap_list_pool[i])
++ continue;
++
++ has_rmap = 0;
++ addr = kmap(slot->rmap_list_pool[i]);
++ BUG_ON(!addr);
++ for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
++ entry = (struct rmap_list_entry *)addr + j;
++ if (is_addr(entry->addr))
++ continue;
++ if (!entry->item)
++ continue;
++ has_rmap = 1;
++ }
++ kunmap(slot->rmap_list_pool[i]);
++ if (!has_rmap) {
++ BUG_ON(slot->pool_counts[i]);
++ __free_page(slot->rmap_list_pool[i]);
++ slot->rmap_list_pool[i] = NULL;
++ }
++ }
++
++ slot->flags &= ~UKSM_SLOT_NEED_SORT;
++}
++
++/*
++ * vma_fully_scanned() - if all the pages in this slot have been scanned.
++ */
++static inline int vma_fully_scanned(struct vma_slot *slot)
++{
++ return slot->pages_scanned == slot->pages;
++}
++
++/**
++ * get_next_rmap_item() - Get the next rmap_item in a vma_slot according to
++ * its random permutation. This function is embedded with the random
++ * permutation index management code.
++ */
++static struct rmap_item *get_next_rmap_item(struct vma_slot *slot, u32 *hash)
++{
++ unsigned long rand_range, addr, swap_index, scan_index;
++ struct rmap_item *item = NULL;
++ struct rmap_list_entry *scan_entry, *swap_entry = NULL;
++ struct page *page;
++
++ scan_index = swap_index = slot->pages_scanned % slot->pages;
++
++ if (pool_entry_boundary(scan_index))
++ try_free_last_pool(slot, scan_index - 1);
++
++ if (vma_fully_scanned(slot)) {
++ if (slot->flags & UKSM_SLOT_NEED_SORT)
++ slot->flags |= UKSM_SLOT_NEED_RERAND;
++ else
++ slot->flags &= ~UKSM_SLOT_NEED_RERAND;
++ if (slot->flags & UKSM_SLOT_NEED_SORT)
++ sort_rmap_entry_list(slot);
++ }
++
++ scan_entry = get_rmap_list_entry(slot, scan_index, 1);
++ if (!scan_entry)
++ return NULL;
++
++ if (entry_is_new(scan_entry)) {
++ scan_entry->addr = get_index_orig_addr(slot, scan_index);
++ set_is_addr(scan_entry->addr);
++ }
++
++ if (slot->flags & UKSM_SLOT_NEED_RERAND) {
++ rand_range = slot->pages - scan_index;
++ BUG_ON(!rand_range);
++ swap_index = scan_index + (prandom_u32() % rand_range);
++ }
++
++ if (swap_index != scan_index) {
++ swap_entry = get_rmap_list_entry(slot, swap_index, 1);
++
++ if (!swap_entry)
++ return NULL;
++
++ if (entry_is_new(swap_entry)) {
++ swap_entry->addr = get_index_orig_addr(slot,
++ swap_index);
++ set_is_addr(swap_entry->addr);
++ }
++ swap_entries(scan_entry, scan_index, swap_entry, swap_index);
++ }
++
++ addr = get_entry_address(scan_entry);
++ item = get_entry_item(scan_entry);
++ BUG_ON(addr > slot->vma->vm_end || addr < slot->vma->vm_start);
++
++ page = follow_page(slot->vma, addr, FOLL_GET);
++ if (IS_ERR_OR_NULL(page))
++ goto nopage;
++
++ if (!PageAnon(page))
++ goto putpage;
++
++ /*check is zero_page pfn or uksm_zero_page*/
++ if ((page_to_pfn(page) == zero_pfn)
++ || (page_to_pfn(page) == uksm_zero_pfn))
++ goto putpage;
++
++ flush_anon_page(slot->vma, page, addr);
++ flush_dcache_page(page);
++
++
++ *hash = page_hash(page, hash_strength, 1);
++ inc_uksm_pages_scanned();
++ /*if the page content all zero, re-map to zero-page*/
++ if (find_zero_page_hash(hash_strength, *hash)) {
++ if (!cmp_and_merge_zero_page(slot->vma, page)) {
++ slot->pages_merged++;
++
++ /* For full-zero pages, no need to create rmap item */
++ goto putpage;
++ } else {
++ inc_rshash_neg(memcmp_cost / 2);
++ }
++ }
++
++ if (!item) {
++ item = alloc_rmap_item();
++ if (item) {
++ /* It has already been zeroed */
++ item->slot = slot;
++ item->address = addr;
++ item->entry_index = scan_index;
++ scan_entry->item = item;
++ inc_rmap_list_pool_count(slot, scan_index);
++ } else
++ goto putpage;
++ }
++
++ BUG_ON(item->slot != slot);
++ /* the page may have changed */
++ item->page = page;
++ put_rmap_list_entry(slot, scan_index);
++ if (swap_entry)
++ put_rmap_list_entry(slot, swap_index);
++ return item;
++
++putpage:
++ put_page(page);
++ page = NULL;
++nopage:
++ /* no page, store addr back and free rmap_item if possible */
++ free_entry_item(scan_entry);
++ put_rmap_list_entry(slot, scan_index);
++ if (swap_entry)
++ put_rmap_list_entry(slot, swap_index);
++ return NULL;
++}
++
++static inline int in_stable_tree(struct rmap_item *rmap_item)
++{
++ return rmap_item->address & STABLE_FLAG;
++}
++
++/**
++ * scan_vma_one_page() - scan the next page in a vma_slot. Called with
++ * mmap_sem locked.
++ */
++static noinline void scan_vma_one_page(struct vma_slot *slot)
++{
++ u32 hash;
++ struct mm_struct *mm;
++ struct rmap_item *rmap_item = NULL;
++ struct vm_area_struct *vma = slot->vma;
++
++ mm = vma->vm_mm;
++ BUG_ON(!mm);
++ BUG_ON(!slot);
++
++ rmap_item = get_next_rmap_item(slot, &hash);
++ if (!rmap_item)
++ goto out1;
++
++ if (PageKsm(rmap_item->page) && in_stable_tree(rmap_item))
++ goto out2;
++
++ cmp_and_merge_page(rmap_item, hash);
++out2:
++ put_page(rmap_item->page);
++out1:
++ slot->pages_scanned++;
++ slot->this_sampled++;
++ if (slot->fully_scanned_round != fully_scanned_round)
++ scanned_virtual_pages++;
++
++ if (vma_fully_scanned(slot))
++ slot->fully_scanned_round = fully_scanned_round;
++}
++
++static inline unsigned long rung_get_pages(struct scan_rung *rung)
++{
++ struct slot_tree_node *node;
++
++ if (!rung->vma_root.rnode)
++ return 0;
++
++ node = container_of(rung->vma_root.rnode, struct slot_tree_node, snode);
++
++ return node->size;
++}
++
++#define RUNG_SAMPLED_MIN 3
++
++static inline
++void uksm_calc_rung_step(struct scan_rung *rung,
++ unsigned long page_time, unsigned long ratio)
++{
++ unsigned long sampled, pages;
++
++ /* will be fully scanned ? */
++ if (!rung->cover_msecs) {
++ rung->step = 1;
++ return;
++ }
++
++ sampled = rung->cover_msecs * (NSEC_PER_MSEC / TIME_RATIO_SCALE)
++ * ratio / page_time;
++
++ /*
++ * Before we finsish a scan round and expensive per-round jobs,
++ * we need to have a chance to estimate the per page time. So
++ * the sampled number can not be too small.
++ */
++ if (sampled < RUNG_SAMPLED_MIN)
++ sampled = RUNG_SAMPLED_MIN;
++
++ pages = rung_get_pages(rung);
++ if (likely(pages > sampled))
++ rung->step = pages / sampled;
++ else
++ rung->step = 1;
++}
++
++static inline int step_need_recalc(struct scan_rung *rung)
++{
++ unsigned long pages, stepmax;
++
++ pages = rung_get_pages(rung);
++ stepmax = pages / RUNG_SAMPLED_MIN;
++
++ return pages && (rung->step > pages ||
++ (stepmax && rung->step > stepmax));
++}
++
++static inline
++void reset_current_scan(struct scan_rung *rung, int finished, int step_recalc)
++{
++ struct vma_slot *slot;
++
++ if (finished)
++ rung->flags |= UKSM_RUNG_ROUND_FINISHED;
++
++ if (step_recalc || step_need_recalc(rung)) {
++ uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
++ BUG_ON(step_need_recalc(rung));
++ }
++
++ slot_iter_index = prandom_u32() % rung->step;
++ BUG_ON(!rung->vma_root.rnode);
++ slot = sradix_tree_next(&rung->vma_root, NULL, 0, slot_iter);
++ BUG_ON(!slot);
++
++ rung->current_scan = slot;
++ rung->current_offset = slot_iter_index;
++}
++
++static inline struct sradix_tree_root *slot_get_root(struct vma_slot *slot)
++{
++ return &slot->rung->vma_root;
++}
++
++/*
++ * return if resetted.
++ */
++static int advance_current_scan(struct scan_rung *rung)
++{
++ unsigned short n;
++ struct vma_slot *slot, *next = NULL;
++
++ BUG_ON(!rung->vma_root.num);
++
++ slot = rung->current_scan;
++ n = (slot->pages - rung->current_offset) % rung->step;
++ slot_iter_index = rung->step - n;
++ next = sradix_tree_next(&rung->vma_root, slot->snode,
++ slot->sindex, slot_iter);
++
++ if (next) {
++ rung->current_offset = slot_iter_index;
++ rung->current_scan = next;
++ return 0;
++ } else {
++ reset_current_scan(rung, 1, 0);
++ return 1;
++ }
++}
++
++static inline void rung_rm_slot(struct vma_slot *slot)
++{
++ struct scan_rung *rung = slot->rung;
++ struct sradix_tree_root *root;
++
++ if (rung->current_scan == slot)
++ advance_current_scan(rung);
++
++ root = slot_get_root(slot);
++ sradix_tree_delete_from_leaf(root, slot->snode, slot->sindex);
++ slot->snode = NULL;
++ if (step_need_recalc(rung)) {
++ uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
++ BUG_ON(step_need_recalc(rung));
++ }
++
++ /* In case advance_current_scan loop back to this slot again */
++ if (rung->vma_root.num && rung->current_scan == slot)
++ reset_current_scan(slot->rung, 1, 0);
++}
++
++static inline void rung_add_new_slots(struct scan_rung *rung,
++ struct vma_slot **slots, unsigned long num)
++{
++ int err;
++ struct vma_slot *slot;
++ unsigned long i;
++ struct sradix_tree_root *root = &rung->vma_root;
++
++ err = sradix_tree_enter(root, (void **)slots, num);
++ BUG_ON(err);
++
++ for (i = 0; i < num; i++) {
++ slot = slots[i];
++ slot->rung = rung;
++ BUG_ON(vma_fully_scanned(slot));
++ }
++
++ if (rung->vma_root.num == num)
++ reset_current_scan(rung, 0, 1);
++}
++
++static inline int rung_add_one_slot(struct scan_rung *rung,
++ struct vma_slot *slot)
++{
++ int err;
++
++ err = sradix_tree_enter(&rung->vma_root, (void **)&slot, 1);
++ if (err)
++ return err;
++
++ slot->rung = rung;
++ if (rung->vma_root.num == 1)
++ reset_current_scan(rung, 0, 1);
++
++ return 0;
++}
++
++/*
++ * Return true if the slot is deleted from its rung.
++ */
++static inline int vma_rung_enter(struct vma_slot *slot, struct scan_rung *rung)
++{
++ struct scan_rung *old_rung = slot->rung;
++ int err;
++
++ if (old_rung == rung)
++ return 0;
++
++ rung_rm_slot(slot);
++ err = rung_add_one_slot(rung, slot);
++ if (err) {
++ err = rung_add_one_slot(old_rung, slot);
++ WARN_ON(err); /* OOPS, badly OOM, we lost this slot */
++ }
++
++ return 1;
++}
++
++static inline int vma_rung_up(struct vma_slot *slot)
++{
++ struct scan_rung *rung;
++
++ rung = slot->rung;
++ if (slot->rung != &uksm_scan_ladder[SCAN_LADDER_SIZE-1])
++ rung++;
++
++ return vma_rung_enter(slot, rung);
++}
++
++static inline int vma_rung_down(struct vma_slot *slot)
++{
++ struct scan_rung *rung;
++
++ rung = slot->rung;
++ if (slot->rung != &uksm_scan_ladder[0])
++ rung--;
++
++ return vma_rung_enter(slot, rung);
++}
++
++/**
++ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
++ */
++static unsigned long cal_dedup_ratio(struct vma_slot *slot)
++{
++ unsigned long ret;
++ unsigned long pages;
++
++ pages = slot->this_sampled;
++ if (!pages)
++ return 0;
++
++ BUG_ON(slot->pages_scanned == slot->last_scanned);
++
++ ret = slot->pages_merged;
++
++ /* Thrashing area filtering */
++ if (ret && uksm_thrash_threshold) {
++ if (slot->pages_cowed * 100 / slot->pages_merged
++ > uksm_thrash_threshold) {
++ ret = 0;
++ } else {
++ ret = slot->pages_merged - slot->pages_cowed;
++ }
++ }
++
++ return ret * 100 / pages;
++}
++
++/**
++ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
++ */
++static unsigned long cal_dedup_ratio_old(struct vma_slot *slot)
++{
++ unsigned long ret;
++ unsigned long pages;
++
++ pages = slot->pages;
++ if (!pages)
++ return 0;
++
++ ret = slot->pages_bemerged;
++
++ /* Thrashing area filtering */
++ if (ret && uksm_thrash_threshold) {
++ if (slot->pages_cowed * 100 / slot->pages_bemerged
++ > uksm_thrash_threshold) {
++ ret = 0;
++ } else {
++ ret = slot->pages_bemerged - slot->pages_cowed;
++ }
++ }
++
++ return ret * 100 / pages;
++}
++
++/**
++ * stable_node_reinsert() - When the hash_strength has been adjusted, the
++ * stable tree need to be restructured, this is the function re-inserting the
++ * stable node.
++ */
++static inline void stable_node_reinsert(struct stable_node *new_node,
++ struct page *page,
++ struct rb_root *root_treep,
++ struct list_head *tree_node_listp,
++ u32 hash)
++{
++ struct rb_node **new = &root_treep->rb_node;
++ struct rb_node *parent = NULL;
++ struct stable_node *stable_node;
++ struct tree_node *tree_node;
++ struct page *tree_page;
++ int cmp;
++
++ while (*new) {
++ int cmp;
++
++ tree_node = rb_entry(*new, struct tree_node, node);
++
++ cmp = hash_cmp(hash, tree_node->hash);
++
++ if (cmp < 0) {
++ parent = *new;
++ new = &parent->rb_left;
++ } else if (cmp > 0) {
++ parent = *new;
++ new = &parent->rb_right;
++ } else
++ break;
++ }
++
++ if (*new) {
++ /* find a stable tree node with same first level hash value */
++ stable_node_hash_max(new_node, page, hash);
++ if (tree_node->count == 1) {
++ stable_node = rb_entry(tree_node->sub_root.rb_node,
++ struct stable_node, node);
++ tree_page = get_uksm_page(stable_node, 1, 0);
++ if (tree_page) {
++ stable_node_hash_max(stable_node,
++ tree_page, hash);
++ put_page(tree_page);
++
++ /* prepare for stable node insertion */
++
++ cmp = hash_cmp(new_node->hash_max,
++ stable_node->hash_max);
++ parent = &stable_node->node;
++ if (cmp < 0)
++ new = &parent->rb_left;
++ else if (cmp > 0)
++ new = &parent->rb_right;
++ else
++ goto failed;
++
++ goto add_node;
++ } else {
++ /* the only stable_node deleted, the tree node
++ * was not deleted.
++ */
++ goto tree_node_reuse;
++ }
++ }
++
++ /* well, search the collision subtree */
++ new = &tree_node->sub_root.rb_node;
++ parent = NULL;
++ BUG_ON(!*new);
++ while (*new) {
++ int cmp;
++
++ stable_node = rb_entry(*new, struct stable_node, node);
++
++ cmp = hash_cmp(new_node->hash_max,
++ stable_node->hash_max);
++
++ if (cmp < 0) {
++ parent = *new;
++ new = &parent->rb_left;
++ } else if (cmp > 0) {
++ parent = *new;
++ new = &parent->rb_right;
++ } else {
++ /* oh, no, still a collision */
++ goto failed;
++ }
++ }
++
++ goto add_node;
++ }
++
++ /* no tree node found */
++ tree_node = alloc_tree_node(tree_node_listp);
++ if (!tree_node) {
++ pr_err("UKSM: memory allocation error!\n");
++ goto failed;
++ } else {
++ tree_node->hash = hash;
++ rb_link_node(&tree_node->node, parent, new);
++ rb_insert_color(&tree_node->node, root_treep);
++
++tree_node_reuse:
++ /* prepare for stable node insertion */
++ parent = NULL;
++ new = &tree_node->sub_root.rb_node;
++ }
++
++add_node:
++ rb_link_node(&new_node->node, parent, new);
++ rb_insert_color(&new_node->node, &tree_node->sub_root);
++ new_node->tree_node = tree_node;
++ tree_node->count++;
++ return;
++
++failed:
++ /* This can only happen when two nodes have collided
++ * in two levels.
++ */
++ new_node->tree_node = NULL;
++ return;
++}
++
++static inline void free_all_tree_nodes(struct list_head *list)
++{
++ struct tree_node *node, *tmp;
++
++ list_for_each_entry_safe(node, tmp, list, all_list) {
++ free_tree_node(node);
++ }
++}
++
++/**
++ * stable_tree_delta_hash() - Delta hash the stable tree from previous hash
++ * strength to the current hash_strength. It re-structures the hole tree.
++ */
++static inline void stable_tree_delta_hash(u32 prev_hash_strength)
++{
++ struct stable_node *node, *tmp;
++ struct rb_root *root_new_treep;
++ struct list_head *new_tree_node_listp;
++
++ stable_tree_index = (stable_tree_index + 1) % 2;
++ root_new_treep = &root_stable_tree[stable_tree_index];
++ new_tree_node_listp = &stable_tree_node_list[stable_tree_index];
++ *root_new_treep = RB_ROOT;
++ BUG_ON(!list_empty(new_tree_node_listp));
++
++ /*
++ * we need to be safe, the node could be removed by get_uksm_page()
++ */
++ list_for_each_entry_safe(node, tmp, &stable_node_list, all_list) {
++ void *addr;
++ struct page *node_page;
++ u32 hash;
++
++ /*
++ * We are completely re-structuring the stable nodes to a new
++ * stable tree. We don't want to touch the old tree unlinks and
++ * old tree_nodes. The old tree_nodes will be freed at once.
++ */
++ node_page = get_uksm_page(node, 0, 0);
++ if (!node_page)
++ continue;
++
++ if (node->tree_node) {
++ hash = node->tree_node->hash;
++
++ addr = kmap_atomic(node_page);
++
++ hash = delta_hash(addr, prev_hash_strength,
++ hash_strength, hash);
++ kunmap_atomic(addr);
++ } else {
++ /*
++ *it was not inserted to rbtree due to collision in last
++ *round scan.
++ */
++ hash = page_hash(node_page, hash_strength, 0);
++ }
++
++ stable_node_reinsert(node, node_page, root_new_treep,
++ new_tree_node_listp, hash);
++ put_page(node_page);
++ }
++
++ root_stable_treep = root_new_treep;
++ free_all_tree_nodes(stable_tree_node_listp);
++ BUG_ON(!list_empty(stable_tree_node_listp));
++ stable_tree_node_listp = new_tree_node_listp;
++}
++
++static inline void inc_hash_strength(unsigned long delta)
++{
++ hash_strength += 1 << delta;
++ if (hash_strength > HASH_STRENGTH_MAX)
++ hash_strength = HASH_STRENGTH_MAX;
++}
++
++static inline void dec_hash_strength(unsigned long delta)
++{
++ unsigned long change = 1 << delta;
++
++ if (hash_strength <= change + 1)
++ hash_strength = 1;
++ else
++ hash_strength -= change;
++}
++
++static inline void inc_hash_strength_delta(void)
++{
++ hash_strength_delta++;
++ if (hash_strength_delta > HASH_STRENGTH_DELTA_MAX)
++ hash_strength_delta = HASH_STRENGTH_DELTA_MAX;
++}
++
++static inline unsigned long get_current_neg_ratio(void)
++{
++ u64 pos = benefit.pos;
++ u64 neg = benefit.neg;
++
++ if (!neg)
++ return 0;
++
++ if (!pos || neg > pos)
++ return 100;
++
++ if (neg > div64_u64(U64_MAX, 100))
++ pos = div64_u64(pos, 100);
++ else
++ neg *= 100;
++
++ return div64_u64(neg, pos);
++}
++
++static inline unsigned long get_current_benefit(void)
++{
++ u64 pos = benefit.pos;
++ u64 neg = benefit.neg;
++ u64 scanned = benefit.scanned;
++
++ if (neg > pos)
++ return 0;
++
++ return div64_u64((pos - neg), scanned);
++}
++
++static inline int judge_rshash_direction(void)
++{
++ u64 current_neg_ratio, stable_benefit;
++ u64 current_benefit, delta = 0;
++ int ret = STILL;
++
++ /*
++ * Try to probe a value after the boot, and in case the system
++ * are still for a long time.
++ */
++ if ((fully_scanned_round & 0xFFULL) == 10) {
++ ret = OBSCURE;
++ goto out;
++ }
++
++ current_neg_ratio = get_current_neg_ratio();
++
++ if (current_neg_ratio == 0) {
++ rshash_neg_cont_zero++;
++ if (rshash_neg_cont_zero > 2)
++ return GO_DOWN;
++ else
++ return STILL;
++ }
++ rshash_neg_cont_zero = 0;
++
++ if (current_neg_ratio > 90) {
++ ret = GO_UP;
++ goto out;
++ }
++
++ current_benefit = get_current_benefit();
++ stable_benefit = rshash_state.stable_benefit;
++
++ if (!stable_benefit) {
++ ret = OBSCURE;
++ goto out;
++ }
++
++ if (current_benefit > stable_benefit)
++ delta = current_benefit - stable_benefit;
++ else if (current_benefit < stable_benefit)
++ delta = stable_benefit - current_benefit;
++
++ delta = div64_u64(100 * delta, stable_benefit);
++
++ if (delta > 50) {
++ rshash_cont_obscure++;
++ if (rshash_cont_obscure > 2)
++ return OBSCURE;
++ else
++ return STILL;
++ }
++
++out:
++ rshash_cont_obscure = 0;
++ return ret;
++}
++
++/**
++ * rshash_adjust() - The main function to control the random sampling state
++ * machine for hash strength adapting.
++ *
++ * return true if hash_strength has changed.
++ */
++static inline int rshash_adjust(void)
++{
++ unsigned long prev_hash_strength = hash_strength;
++
++ if (!encode_benefit())
++ return 0;
++
++ switch (rshash_state.state) {
++ case RSHASH_STILL:
++ switch (judge_rshash_direction()) {
++ case GO_UP:
++ if (rshash_state.pre_direct == GO_DOWN)
++ hash_strength_delta = 0;
++
++ inc_hash_strength(hash_strength_delta);
++ inc_hash_strength_delta();
++ rshash_state.stable_benefit = get_current_benefit();
++ rshash_state.pre_direct = GO_UP;
++ break;
++
++ case GO_DOWN:
++ if (rshash_state.pre_direct == GO_UP)
++ hash_strength_delta = 0;
++
++ dec_hash_strength(hash_strength_delta);
++ inc_hash_strength_delta();
++ rshash_state.stable_benefit = get_current_benefit();
++ rshash_state.pre_direct = GO_DOWN;
++ break;
++
++ case OBSCURE:
++ rshash_state.stable_point = hash_strength;
++ rshash_state.turn_point_down = hash_strength;
++ rshash_state.turn_point_up = hash_strength;
++ rshash_state.turn_benefit_down = get_current_benefit();
++ rshash_state.turn_benefit_up = get_current_benefit();
++ rshash_state.lookup_window_index = 0;
++ rshash_state.state = RSHASH_TRYDOWN;
++ dec_hash_strength(hash_strength_delta);
++ inc_hash_strength_delta();
++ break;
++
++ case STILL:
++ break;
++ default:
++ BUG();
++ }
++ break;
++
++ case RSHASH_TRYDOWN:
++ if (rshash_state.lookup_window_index++ % 5 == 0)
++ rshash_state.below_count = 0;
++
++ if (get_current_benefit() < rshash_state.stable_benefit)
++ rshash_state.below_count++;
++ else if (get_current_benefit() >
++ rshash_state.turn_benefit_down) {
++ rshash_state.turn_point_down = hash_strength;
++ rshash_state.turn_benefit_down = get_current_benefit();
++ }
++
++ if (rshash_state.below_count >= 3 ||
++ judge_rshash_direction() == GO_UP ||
++ hash_strength == 1) {
++ hash_strength = rshash_state.stable_point;
++ hash_strength_delta = 0;
++ inc_hash_strength(hash_strength_delta);
++ inc_hash_strength_delta();
++ rshash_state.lookup_window_index = 0;
++ rshash_state.state = RSHASH_TRYUP;
++ hash_strength_delta = 0;
++ } else {
++ dec_hash_strength(hash_strength_delta);
++ inc_hash_strength_delta();
++ }
++ break;
++
++ case RSHASH_TRYUP:
++ if (rshash_state.lookup_window_index++ % 5 == 0)
++ rshash_state.below_count = 0;
++
++ if (get_current_benefit() < rshash_state.turn_benefit_down)
++ rshash_state.below_count++;
++ else if (get_current_benefit() > rshash_state.turn_benefit_up) {
++ rshash_state.turn_point_up = hash_strength;
++ rshash_state.turn_benefit_up = get_current_benefit();
++ }
++
++ if (rshash_state.below_count >= 3 ||
++ judge_rshash_direction() == GO_DOWN ||
++ hash_strength == HASH_STRENGTH_MAX) {
++ hash_strength = rshash_state.turn_benefit_up >
++ rshash_state.turn_benefit_down ?
++ rshash_state.turn_point_up :
++ rshash_state.turn_point_down;
++
++ rshash_state.state = RSHASH_PRE_STILL;
++ } else {
++ inc_hash_strength(hash_strength_delta);
++ inc_hash_strength_delta();
++ }
++
++ break;
++
++ case RSHASH_NEW:
++ case RSHASH_PRE_STILL:
++ rshash_state.stable_benefit = get_current_benefit();
++ rshash_state.state = RSHASH_STILL;
++ hash_strength_delta = 0;
++ break;
++ default:
++ BUG();
++ }
++
++ /* rshash_neg = rshash_pos = 0; */
++ reset_benefit();
++
++ if (prev_hash_strength != hash_strength)
++ stable_tree_delta_hash(prev_hash_strength);
++
++ return prev_hash_strength != hash_strength;
++}
++
++/**
++ * round_update_ladder() - The main function to do update of all the
++ * adjustments whenever a scan round is finished.
++ */
++static noinline void round_update_ladder(void)
++{
++ int i;
++ unsigned long dedup;
++ struct vma_slot *slot, *tmp_slot;
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++)
++ uksm_scan_ladder[i].flags &= ~UKSM_RUNG_ROUND_FINISHED;
++
++ list_for_each_entry_safe(slot, tmp_slot, &vma_slot_dedup, dedup_list) {
++
++ /* slot may be rung_rm_slot() when mm exits */
++ if (slot->snode) {
++ dedup = cal_dedup_ratio_old(slot);
++ if (dedup && dedup >= uksm_abundant_threshold)
++ vma_rung_up(slot);
++ }
++
++ slot->pages_bemerged = 0;
++ slot->pages_cowed = 0;
++
++ list_del_init(&slot->dedup_list);
++ }
++}
++
++static void uksm_del_vma_slot(struct vma_slot *slot)
++{
++ int i, j;
++ struct rmap_list_entry *entry;
++
++ if (slot->snode) {
++ /*
++ * In case it just failed when entering the rung, it's not
++ * necessary.
++ */
++ rung_rm_slot(slot);
++ }
++
++ if (!list_empty(&slot->dedup_list))
++ list_del(&slot->dedup_list);
++
++ if (!slot->rmap_list_pool || !slot->pool_counts) {
++ /* In case it OOMed in uksm_vma_enter() */
++ goto out;
++ }
++
++ for (i = 0; i < slot->pool_size; i++) {
++ void *addr;
++
++ if (!slot->rmap_list_pool[i])
++ continue;
++
++ addr = kmap(slot->rmap_list_pool[i]);
++ for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
++ entry = (struct rmap_list_entry *)addr + j;
++ if (is_addr(entry->addr))
++ continue;
++ if (!entry->item)
++ continue;
++
++ remove_rmap_item_from_tree(entry->item);
++ free_rmap_item(entry->item);
++ slot->pool_counts[i]--;
++ }
++ BUG_ON(slot->pool_counts[i]);
++ kunmap(slot->rmap_list_pool[i]);
++ __free_page(slot->rmap_list_pool[i]);
++ }
++ kfree(slot->rmap_list_pool);
++ kfree(slot->pool_counts);
++
++out:
++ slot->rung = NULL;
++ if (slot->flags & UKSM_SLOT_IN_UKSM) {
++ BUG_ON(uksm_pages_total < slot->pages);
++ uksm_pages_total -= slot->pages;
++ }
++
++ if (slot->fully_scanned_round == fully_scanned_round)
++ scanned_virtual_pages -= slot->pages;
++ else
++ scanned_virtual_pages -= slot->pages_scanned;
++ free_vma_slot(slot);
++}
++
++
++#define SPIN_LOCK_PERIOD 32
++static struct vma_slot *cleanup_slots[SPIN_LOCK_PERIOD];
++static inline void cleanup_vma_slots(void)
++{
++ struct vma_slot *slot;
++ int i;
++
++ i = 0;
++ spin_lock(&vma_slot_list_lock);
++ while (!list_empty(&vma_slot_del)) {
++ slot = list_entry(vma_slot_del.next,
++ struct vma_slot, slot_list);
++ list_del(&slot->slot_list);
++ cleanup_slots[i++] = slot;
++ if (i == SPIN_LOCK_PERIOD) {
++ spin_unlock(&vma_slot_list_lock);
++ while (--i >= 0)
++ uksm_del_vma_slot(cleanup_slots[i]);
++ i = 0;
++ spin_lock(&vma_slot_list_lock);
++ }
++ }
++ spin_unlock(&vma_slot_list_lock);
++
++ while (--i >= 0)
++ uksm_del_vma_slot(cleanup_slots[i]);
++}
++
++/*
++ * Expotional moving average formula
++ */
++static inline unsigned long ema(unsigned long curr, unsigned long last_ema)
++{
++ /*
++ * For a very high burst, even the ema cannot work well, a false very
++ * high per-page time estimation can result in feedback in very high
++ * overhead of context switch and rung update -- this will then lead
++ * to higher per-paper time, this may not converge.
++ *
++ * Instead, we try to approach this value in a binary manner.
++ */
++ if (curr > last_ema * 10)
++ return last_ema * 2;
++
++ return (EMA_ALPHA * curr + (100 - EMA_ALPHA) * last_ema) / 100;
++}
++
++/*
++ * convert cpu ratio in 1/TIME_RATIO_SCALE configured by user to
++ * nanoseconds based on current uksm_sleep_jiffies.
++ */
++static inline unsigned long cpu_ratio_to_nsec(unsigned int ratio)
++{
++ return NSEC_PER_USEC * jiffies_to_usecs(uksm_sleep_jiffies) /
++ (TIME_RATIO_SCALE - ratio) * ratio;
++}
++
++
++static inline unsigned long rung_real_ratio(int cpu_time_ratio)
++{
++ unsigned long ret;
++
++ BUG_ON(!cpu_time_ratio);
++
++ if (cpu_time_ratio > 0)
++ ret = cpu_time_ratio;
++ else
++ ret = (unsigned long)(-cpu_time_ratio) *
++ uksm_max_cpu_percentage / 100UL;
++
++ return ret ? ret : 1;
++}
++
++static noinline void uksm_calc_scan_pages(void)
++{
++ struct scan_rung *ladder = uksm_scan_ladder;
++ unsigned long sleep_usecs, nsecs;
++ unsigned long ratio;
++ int i;
++ unsigned long per_page;
++
++ if (uksm_ema_page_time > 100000 ||
++ (((unsigned long) uksm_eval_round & (256UL - 1)) == 0UL))
++ uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
++
++ per_page = uksm_ema_page_time;
++ BUG_ON(!per_page);
++
++ /*
++ * For every 8 eval round, we try to probe a uksm_sleep_jiffies value
++ * based on saved user input.
++ */
++ if (((unsigned long) uksm_eval_round & (8UL - 1)) == 0UL)
++ uksm_sleep_jiffies = uksm_sleep_saved;
++
++ /* We require a rung scan at least 1 page in a period. */
++ nsecs = per_page;
++ ratio = rung_real_ratio(ladder[0].cpu_ratio);
++ if (cpu_ratio_to_nsec(ratio) < nsecs) {
++ sleep_usecs = nsecs * (TIME_RATIO_SCALE - ratio) / ratio
++ / NSEC_PER_USEC;
++ uksm_sleep_jiffies = usecs_to_jiffies(sleep_usecs) + 1;
++ }
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ ratio = rung_real_ratio(ladder[i].cpu_ratio);
++ ladder[i].pages_to_scan = cpu_ratio_to_nsec(ratio) /
++ per_page;
++ BUG_ON(!ladder[i].pages_to_scan);
++ uksm_calc_rung_step(&ladder[i], per_page, ratio);
++ }
++}
++
++/*
++ * From the scan time of this round (ns) to next expected min sleep time
++ * (ms), be careful of the possible overflows. ratio is taken from
++ * rung_real_ratio()
++ */
++static inline
++unsigned int scan_time_to_sleep(unsigned long long scan_time, unsigned long ratio)
++{
++ scan_time >>= 20; /* to msec level now */
++ BUG_ON(scan_time > (ULONG_MAX / TIME_RATIO_SCALE));
++
++ return (unsigned int) ((unsigned long) scan_time *
++ (TIME_RATIO_SCALE - ratio) / ratio);
++}
++
++#define __round_mask(x, y) ((__typeof__(x))((y)-1))
++#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
++
++static void uksm_vma_enter(struct vma_slot **slots, unsigned long num)
++{
++ struct scan_rung *rung;
++
++ rung = &uksm_scan_ladder[0];
++ rung_add_new_slots(rung, slots, num);
++}
++
++static struct vma_slot *batch_slots[SLOT_TREE_NODE_STORE_SIZE];
++
++static void uksm_enter_all_slots(void)
++{
++ struct vma_slot *slot;
++ unsigned long index;
++ struct list_head empty_vma_list;
++ int i;
++
++ i = 0;
++ index = 0;
++ INIT_LIST_HEAD(&empty_vma_list);
++
++ spin_lock(&vma_slot_list_lock);
++ while (!list_empty(&vma_slot_new)) {
++ slot = list_entry(vma_slot_new.next,
++ struct vma_slot, slot_list);
++
++ if (!slot->vma->anon_vma) {
++ list_move(&slot->slot_list, &empty_vma_list);
++ } else if (vma_can_enter(slot->vma)) {
++ batch_slots[index++] = slot;
++ list_del_init(&slot->slot_list);
++ } else {
++ list_move(&slot->slot_list, &vma_slot_noadd);
++ }
++
++ if (++i == SPIN_LOCK_PERIOD ||
++ (index && !(index % SLOT_TREE_NODE_STORE_SIZE))) {
++ spin_unlock(&vma_slot_list_lock);
++
++ if (index && !(index % SLOT_TREE_NODE_STORE_SIZE)) {
++ uksm_vma_enter(batch_slots, index);
++ index = 0;
++ }
++ i = 0;
++ cond_resched();
++ spin_lock(&vma_slot_list_lock);
++ }
++ }
++
++ list_splice(&empty_vma_list, &vma_slot_new);
++
++ spin_unlock(&vma_slot_list_lock);
++
++ if (index)
++ uksm_vma_enter(batch_slots, index);
++
++}
++
++static inline int rung_round_finished(struct scan_rung *rung)
++{
++ return rung->flags & UKSM_RUNG_ROUND_FINISHED;
++}
++
++static inline void judge_slot(struct vma_slot *slot)
++{
++ struct scan_rung *rung = slot->rung;
++ unsigned long dedup;
++ int deleted;
++
++ dedup = cal_dedup_ratio(slot);
++ if (vma_fully_scanned(slot) && uksm_thrash_threshold)
++ deleted = vma_rung_enter(slot, &uksm_scan_ladder[0]);
++ else if (dedup && dedup >= uksm_abundant_threshold)
++ deleted = vma_rung_up(slot);
++ else
++ deleted = vma_rung_down(slot);
++
++ slot->pages_merged = 0;
++ slot->pages_cowed = 0;
++ slot->this_sampled = 0;
++
++ if (vma_fully_scanned(slot))
++ slot->pages_scanned = 0;
++
++ slot->last_scanned = slot->pages_scanned;
++
++ /* If its deleted in above, then rung was already advanced. */
++ if (!deleted)
++ advance_current_scan(rung);
++}
++
++
++static inline int hash_round_finished(void)
++{
++ if (scanned_virtual_pages > (uksm_pages_total >> 2)) {
++ scanned_virtual_pages = 0;
++ if (uksm_pages_scanned)
++ fully_scanned_round++;
++
++ return 1;
++ } else {
++ return 0;
++ }
++}
++
++#define UKSM_MMSEM_BATCH 5
++#define BUSY_RETRY 100
++
++/**
++ * uksm_do_scan() - the main worker function.
++ */
++static noinline void uksm_do_scan(void)
++{
++ struct vma_slot *slot, *iter;
++ struct mm_struct *busy_mm;
++ unsigned char round_finished, all_rungs_emtpy;
++ int i, err, mmsem_batch;
++ unsigned long pcost;
++ long long delta_exec;
++ unsigned long vpages, max_cpu_ratio;
++ unsigned long long start_time, end_time, scan_time;
++ unsigned int expected_jiffies;
++
++ might_sleep();
++
++ vpages = 0;
++
++ start_time = task_sched_runtime(current);
++ max_cpu_ratio = 0;
++ mmsem_batch = 0;
++
++ for (i = 0; i < SCAN_LADDER_SIZE;) {
++ struct scan_rung *rung = &uksm_scan_ladder[i];
++ unsigned long ratio;
++ int busy_retry;
++
++ if (!rung->pages_to_scan) {
++ i++;
++ continue;
++ }
++
++ if (!rung->vma_root.num) {
++ rung->pages_to_scan = 0;
++ i++;
++ continue;
++ }
++
++ ratio = rung_real_ratio(rung->cpu_ratio);
++ if (ratio > max_cpu_ratio)
++ max_cpu_ratio = ratio;
++
++ busy_retry = BUSY_RETRY;
++ /*
++ * Do not consider rung_round_finished() here, just used up the
++ * rung->pages_to_scan quota.
++ */
++ while (rung->pages_to_scan && rung->vma_root.num &&
++ likely(!freezing(current))) {
++ int reset = 0;
++
++ slot = rung->current_scan;
++
++ BUG_ON(vma_fully_scanned(slot));
++
++ if (mmsem_batch)
++ err = 0;
++ else
++ err = try_down_read_slot_mmap_sem(slot);
++
++ if (err == -ENOENT) {
++rm_slot:
++ rung_rm_slot(slot);
++ continue;
++ }
++
++ busy_mm = slot->mm;
++
++ if (err == -EBUSY) {
++ /* skip other vmas on the same mm */
++ do {
++ reset = advance_current_scan(rung);
++ iter = rung->current_scan;
++ busy_retry--;
++ if (iter->vma->vm_mm != busy_mm ||
++ !busy_retry || reset)
++ break;
++ } while (1);
++
++ if (iter->vma->vm_mm != busy_mm) {
++ continue;
++ } else {
++ /* scan round finsished */
++ break;
++ }
++ }
++
++ BUG_ON(!vma_can_enter(slot->vma));
++ if (uksm_test_exit(slot->vma->vm_mm)) {
++ mmsem_batch = 0;
++ mmap_read_unlock(slot->vma->vm_mm);
++ goto rm_slot;
++ }
++
++ if (mmsem_batch)
++ mmsem_batch--;
++ else
++ mmsem_batch = UKSM_MMSEM_BATCH;
++
++ /* Ok, we have take the mmap_sem, ready to scan */
++ scan_vma_one_page(slot);
++ rung->pages_to_scan--;
++ vpages++;
++
++ if (rung->current_offset + rung->step > slot->pages - 1
++ || vma_fully_scanned(slot)) {
++ mmap_read_unlock(slot->vma->vm_mm);
++ judge_slot(slot);
++ mmsem_batch = 0;
++ } else {
++ rung->current_offset += rung->step;
++ if (!mmsem_batch)
++ mmap_read_unlock(slot->vma->vm_mm);
++ }
++
++ busy_retry = BUSY_RETRY;
++ cond_resched();
++ }
++
++ if (mmsem_batch) {
++ mmap_read_unlock(slot->vma->vm_mm);
++ mmsem_batch = 0;
++ }
++
++ if (freezing(current))
++ break;
++
++ cond_resched();
++ }
++ end_time = task_sched_runtime(current);
++ delta_exec = end_time - start_time;
++
++ if (freezing(current))
++ return;
++
++ cleanup_vma_slots();
++ uksm_enter_all_slots();
++
++ round_finished = 1;
++ all_rungs_emtpy = 1;
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ struct scan_rung *rung = &uksm_scan_ladder[i];
++
++ if (rung->vma_root.num) {
++ all_rungs_emtpy = 0;
++ if (!rung_round_finished(rung))
++ round_finished = 0;
++ }
++ }
++
++ if (all_rungs_emtpy)
++ round_finished = 0;
++
++ if (round_finished) {
++ round_update_ladder();
++ uksm_eval_round++;
++
++ if (hash_round_finished() && rshash_adjust()) {
++ /* Reset the unstable root iff hash strength changed */
++ uksm_hash_round++;
++ root_unstable_tree = RB_ROOT;
++ free_all_tree_nodes(&unstable_tree_node_list);
++ }
++
++ /*
++ * A number of pages can hang around indefinitely on per-cpu
++ * pagevecs, raised page count preventing write_protect_page
++ * from merging them. Though it doesn't really matter much,
++ * it is puzzling to see some stuck in pages_volatile until
++ * other activity jostles them out, and they also prevented
++ * LTP's KSM test from succeeding deterministically; so drain
++ * them here (here rather than on entry to uksm_do_scan(),
++ * so we don't IPI too often when pages_to_scan is set low).
++ */
++ lru_add_drain_all();
++ }
++
++
++ if (vpages && delta_exec > 0) {
++ pcost = (unsigned long) delta_exec / vpages;
++ if (likely(uksm_ema_page_time))
++ uksm_ema_page_time = ema(pcost, uksm_ema_page_time);
++ else
++ uksm_ema_page_time = pcost;
++ }
++
++ uksm_calc_scan_pages();
++ uksm_sleep_real = uksm_sleep_jiffies;
++ /* in case of radical cpu bursts, apply the upper bound */
++ end_time = task_sched_runtime(current);
++ if (max_cpu_ratio && end_time > start_time) {
++ scan_time = end_time - start_time;
++ expected_jiffies = msecs_to_jiffies(
++ scan_time_to_sleep(scan_time, max_cpu_ratio));
++
++ if (expected_jiffies > uksm_sleep_real)
++ uksm_sleep_real = expected_jiffies;
++
++ /* We have a 1 second up bound for responsiveness. */
++ if (jiffies_to_msecs(uksm_sleep_real) > MSEC_PER_SEC)
++ uksm_sleep_real = msecs_to_jiffies(1000);
++ }
++
++ return;
++}
++
++static int ksmd_should_run(void)
++{
++ return uksm_run & UKSM_RUN_MERGE;
++}
++
++static int uksm_scan_thread(void *nothing)
++{
++ set_freezable();
++ set_user_nice(current, 5);
++
++ while (!kthread_should_stop()) {
++ mutex_lock(&uksm_thread_mutex);
++ if (ksmd_should_run())
++ uksm_do_scan();
++ mutex_unlock(&uksm_thread_mutex);
++
++ try_to_freeze();
++
++ if (ksmd_should_run()) {
++ schedule_timeout_interruptible(uksm_sleep_real);
++ uksm_sleep_times++;
++ } else {
++ wait_event_freezable(uksm_thread_wait,
++ ksmd_should_run() || kthread_should_stop());
++ }
++ }
++ return 0;
++}
++
++void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
++{
++ struct stable_node *stable_node;
++ struct node_vma *node_vma;
++ struct rmap_item *rmap_item;
++ int search_new_forks = 0;
++ unsigned long address;
++
++ VM_BUG_ON_PAGE(!PageKsm(page), page);
++ VM_BUG_ON_PAGE(!PageLocked(page), page);
++
++ stable_node = page_stable_node(page);
++ if (!stable_node)
++ return;
++again:
++ hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) {
++ hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) {
++ struct anon_vma *anon_vma = rmap_item->anon_vma;
++ struct anon_vma_chain *vmac;
++ struct vm_area_struct *vma;
++
++ cond_resched();
++ anon_vma_lock_read(anon_vma);
++ anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
++ 0, ULONG_MAX) {
++ cond_resched();
++ vma = vmac->vma;
++ address = get_rmap_addr(rmap_item);
++
++ if (address < vma->vm_start ||
++ address >= vma->vm_end)
++ continue;
++
++ if ((rmap_item->slot->vma == vma) ==
++ search_new_forks)
++ continue;
++
++ if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
++ continue;
++
++ if (!rwc->rmap_one(page, vma, address, rwc->arg)) {
++ anon_vma_unlock_read(anon_vma);
++ return;
++ }
++
++ if (rwc->done && rwc->done(page)) {
++ anon_vma_unlock_read(anon_vma);
++ return;
++ }
++ }
++ anon_vma_unlock_read(anon_vma);
++ }
++ }
++ if (!search_new_forks++)
++ goto again;
++}
++
++#ifdef CONFIG_MIGRATION
++/* Common ksm interface but may be specific to uksm */
++void ksm_migrate_page(struct page *newpage, struct page *oldpage)
++{
++ struct stable_node *stable_node;
++
++ VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
++ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
++ VM_BUG_ON(newpage->mapping != oldpage->mapping);
++
++ stable_node = page_stable_node(newpage);
++ if (stable_node) {
++ VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
++ stable_node->kpfn = page_to_pfn(newpage);
++ /*
++ * newpage->mapping was set in advance; now we need smp_wmb()
++ * to make sure that the new stable_node->kpfn is visible
++ * to get_ksm_page() before it can see that oldpage->mapping
++ * has gone stale (or that PageSwapCache has been cleared).
++ */
++ smp_wmb();
++ set_page_stable_node(oldpage, NULL);
++ }
++}
++#endif /* CONFIG_MIGRATION */
++
++#ifdef CONFIG_MEMORY_HOTREMOVE
++static struct stable_node *uksm_check_stable_tree(unsigned long start_pfn,
++ unsigned long end_pfn)
++{
++ struct rb_node *node;
++
++ for (node = rb_first(root_stable_treep); node; node = rb_next(node)) {
++ struct stable_node *stable_node;
++
++ stable_node = rb_entry(node, struct stable_node, node);
++ if (stable_node->kpfn >= start_pfn &&
++ stable_node->kpfn < end_pfn)
++ return stable_node;
++ }
++ return NULL;
++}
++
++static int uksm_memory_callback(struct notifier_block *self,
++ unsigned long action, void *arg)
++{
++ struct memory_notify *mn = arg;
++ struct stable_node *stable_node;
++
++ switch (action) {
++ case MEM_GOING_OFFLINE:
++ /*
++ * Keep it very simple for now: just lock out ksmd and
++ * MADV_UNMERGEABLE while any memory is going offline.
++ * mutex_lock_nested() is necessary because lockdep was alarmed
++ * that here we take uksm_thread_mutex inside notifier chain
++ * mutex, and later take notifier chain mutex inside
++ * uksm_thread_mutex to unlock it. But that's safe because both
++ * are inside mem_hotplug_mutex.
++ */
++ mutex_lock_nested(&uksm_thread_mutex, SINGLE_DEPTH_NESTING);
++ break;
++
++ case MEM_OFFLINE:
++ /*
++ * Most of the work is done by page migration; but there might
++ * be a few stable_nodes left over, still pointing to struct
++ * pages which have been offlined: prune those from the tree.
++ */
++ while ((stable_node = uksm_check_stable_tree(mn->start_pfn,
++ mn->start_pfn + mn->nr_pages)) != NULL)
++ remove_node_from_stable_tree(stable_node, 1, 1);
++ /* fallthrough */
++
++ case MEM_CANCEL_OFFLINE:
++ mutex_unlock(&uksm_thread_mutex);
++ break;
++ }
++ return NOTIFY_OK;
++}
++#endif /* CONFIG_MEMORY_HOTREMOVE */
++
++#ifdef CONFIG_SYSFS
++/*
++ * This all compiles without CONFIG_SYSFS, but is a waste of space.
++ */
++
++#define UKSM_ATTR_RO(_name) \
++ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
++#define UKSM_ATTR(_name) \
++ static struct kobj_attribute _name##_attr = \
++ __ATTR(_name, 0644, _name##_show, _name##_store)
++
++static ssize_t max_cpu_percentage_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%u\n", uksm_max_cpu_percentage);
++}
++
++static ssize_t max_cpu_percentage_store(struct kobject *kobj,
++ struct kobj_attribute *attr,
++ const char *buf, size_t count)
++{
++ unsigned long max_cpu_percentage;
++ int err;
++
++ err = kstrtoul(buf, 10, &max_cpu_percentage);
++ if (err || max_cpu_percentage > 100)
++ return -EINVAL;
++
++ if (max_cpu_percentage == 100)
++ max_cpu_percentage = 99;
++ else if (max_cpu_percentage < 10)
++ max_cpu_percentage = 10;
++
++ uksm_max_cpu_percentage = max_cpu_percentage;
++
++ return count;
++}
++UKSM_ATTR(max_cpu_percentage);
++
++static ssize_t sleep_millisecs_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%u\n", jiffies_to_msecs(uksm_sleep_jiffies));
++}
++
++static ssize_t sleep_millisecs_store(struct kobject *kobj,
++ struct kobj_attribute *attr,
++ const char *buf, size_t count)
++{
++ unsigned long msecs;
++ int err;
++
++ err = kstrtoul(buf, 10, &msecs);
++ if (err || msecs > MSEC_PER_SEC)
++ return -EINVAL;
++
++ uksm_sleep_jiffies = msecs_to_jiffies(msecs);
++ uksm_sleep_saved = uksm_sleep_jiffies;
++
++ return count;
++}
++UKSM_ATTR(sleep_millisecs);
++
++
++static ssize_t cpu_governor_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
++ int i;
++
++ buf[0] = '\0';
++ for (i = 0; i < n ; i++) {
++ if (uksm_cpu_governor == i)
++ strcat(buf, "[");
++
++ strcat(buf, uksm_cpu_governor_str[i]);
++
++ if (uksm_cpu_governor == i)
++ strcat(buf, "]");
++
++ strcat(buf, " ");
++ }
++ strcat(buf, "\n");
++
++ return strlen(buf);
++}
++
++static inline void init_performance_values(void)
++{
++ int i;
++ struct scan_rung *rung;
++ struct uksm_cpu_preset_s *preset = uksm_cpu_preset + uksm_cpu_governor;
++
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ rung = uksm_scan_ladder + i;
++ rung->cpu_ratio = preset->cpu_ratio[i];
++ rung->cover_msecs = preset->cover_msecs[i];
++ }
++
++ uksm_max_cpu_percentage = preset->max_cpu;
++}
++
++static ssize_t cpu_governor_store(struct kobject *kobj,
++ struct kobj_attribute *attr,
++ const char *buf, size_t count)
++{
++ int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
++
++ for (n--; n >= 0 ; n--) {
++ if (!strncmp(buf, uksm_cpu_governor_str[n],
++ strlen(uksm_cpu_governor_str[n])))
++ break;
++ }
++
++ if (n < 0)
++ return -EINVAL;
++ else
++ uksm_cpu_governor = n;
++
++ init_performance_values();
++
++ return count;
++}
++UKSM_ATTR(cpu_governor);
++
++static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
++ char *buf)
++{
++ return sprintf(buf, "%u\n", uksm_run);
++}
++
++static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
++ const char *buf, size_t count)
++{
++ int err;
++ unsigned long flags;
++
++ err = kstrtoul(buf, 10, &flags);
++ if (err || flags > UINT_MAX)
++ return -EINVAL;
++ if (flags > UKSM_RUN_MERGE)
++ return -EINVAL;
++
++ mutex_lock(&uksm_thread_mutex);
++ if (uksm_run != flags)
++ uksm_run = flags;
++ mutex_unlock(&uksm_thread_mutex);
++
++ if (flags & UKSM_RUN_MERGE)
++ wake_up_interruptible(&uksm_thread_wait);
++
++ return count;
++}
++UKSM_ATTR(run);
++
++static ssize_t abundant_threshold_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%u\n", uksm_abundant_threshold);
++}
++
++static ssize_t abundant_threshold_store(struct kobject *kobj,
++ struct kobj_attribute *attr,
++ const char *buf, size_t count)
++{
++ int err;
++ unsigned long flags;
++
++ err = kstrtoul(buf, 10, &flags);
++ if (err || flags > 99)
++ return -EINVAL;
++
++ uksm_abundant_threshold = flags;
++
++ return count;
++}
++UKSM_ATTR(abundant_threshold);
++
++static ssize_t thrash_threshold_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%u\n", uksm_thrash_threshold);
++}
++
++static ssize_t thrash_threshold_store(struct kobject *kobj,
++ struct kobj_attribute *attr,
++ const char *buf, size_t count)
++{
++ int err;
++ unsigned long flags;
++
++ err = kstrtoul(buf, 10, &flags);
++ if (err || flags > 99)
++ return -EINVAL;
++
++ uksm_thrash_threshold = flags;
++
++ return count;
++}
++UKSM_ATTR(thrash_threshold);
++
++static ssize_t cpu_ratios_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ int i, size;
++ struct scan_rung *rung;
++ char *p = buf;
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ rung = &uksm_scan_ladder[i];
++
++ if (rung->cpu_ratio > 0)
++ size = sprintf(p, "%d ", rung->cpu_ratio);
++ else
++ size = sprintf(p, "MAX/%d ",
++ TIME_RATIO_SCALE / -rung->cpu_ratio);
++
++ p += size;
++ }
++
++ *p++ = '\n';
++ *p = '\0';
++
++ return p - buf;
++}
++
++static ssize_t cpu_ratios_store(struct kobject *kobj,
++ struct kobj_attribute *attr,
++ const char *buf, size_t count)
++{
++ int i, cpuratios[SCAN_LADDER_SIZE], err;
++ unsigned long value;
++ struct scan_rung *rung;
++ char *p, *end = NULL;
++
++ p = kzalloc(count, GFP_KERNEL);
++ if (!p)
++ return -ENOMEM;
++
++ memcpy(p, buf, count);
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ if (i != SCAN_LADDER_SIZE - 1) {
++ end = strchr(p, ' ');
++ if (!end)
++ return -EINVAL;
++
++ *end = '\0';
++ }
++
++ if (strstr(p, "MAX/")) {
++ p = strchr(p, '/') + 1;
++ err = kstrtoul(p, 10, &value);
++ if (err || value > TIME_RATIO_SCALE || !value)
++ return -EINVAL;
++
++ cpuratios[i] = -(int) (TIME_RATIO_SCALE / value);
++ } else {
++ err = kstrtoul(p, 10, &value);
++ if (err || value > TIME_RATIO_SCALE || !value)
++ return -EINVAL;
++
++ cpuratios[i] = value;
++ }
++
++ p = end + 1;
++ }
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ rung = &uksm_scan_ladder[i];
++
++ rung->cpu_ratio = cpuratios[i];
++ }
++
++ return count;
++}
++UKSM_ATTR(cpu_ratios);
++
++static ssize_t eval_intervals_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ int i, size;
++ struct scan_rung *rung;
++ char *p = buf;
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ rung = &uksm_scan_ladder[i];
++ size = sprintf(p, "%u ", rung->cover_msecs);
++ p += size;
++ }
++
++ *p++ = '\n';
++ *p = '\0';
++
++ return p - buf;
++}
++
++static ssize_t eval_intervals_store(struct kobject *kobj,
++ struct kobj_attribute *attr,
++ const char *buf, size_t count)
++{
++ int i, err;
++ unsigned long values[SCAN_LADDER_SIZE];
++ struct scan_rung *rung;
++ char *p, *end = NULL;
++ ssize_t ret = count;
++
++ p = kzalloc(count + 2, GFP_KERNEL);
++ if (!p)
++ return -ENOMEM;
++
++ memcpy(p, buf, count);
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ if (i != SCAN_LADDER_SIZE - 1) {
++ end = strchr(p, ' ');
++ if (!end) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ *end = '\0';
++ }
++
++ err = kstrtoul(p, 10, &values[i]);
++ if (err) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ p = end + 1;
++ }
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ rung = &uksm_scan_ladder[i];
++
++ rung->cover_msecs = values[i];
++ }
++
++out:
++ kfree(p);
++ return ret;
++}
++UKSM_ATTR(eval_intervals);
++
++static ssize_t ema_per_page_time_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%lu\n", uksm_ema_page_time);
++}
++UKSM_ATTR_RO(ema_per_page_time);
++
++static ssize_t pages_shared_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%lu\n", uksm_pages_shared);
++}
++UKSM_ATTR_RO(pages_shared);
++
++static ssize_t pages_sharing_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%lu\n", uksm_pages_sharing);
++}
++UKSM_ATTR_RO(pages_sharing);
++
++static ssize_t pages_unshared_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%lu\n", uksm_pages_unshared);
++}
++UKSM_ATTR_RO(pages_unshared);
++
++static ssize_t full_scans_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%llu\n", fully_scanned_round);
++}
++UKSM_ATTR_RO(full_scans);
++
++static ssize_t pages_scanned_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ unsigned long base = 0;
++ u64 delta, ret;
++
++ if (pages_scanned_stored) {
++ base = pages_scanned_base;
++ ret = pages_scanned_stored;
++ delta = uksm_pages_scanned >> base;
++ if (CAN_OVERFLOW_U64(ret, delta)) {
++ ret >>= 1;
++ delta >>= 1;
++ base++;
++ ret += delta;
++ }
++ } else {
++ ret = uksm_pages_scanned;
++ }
++
++ while (ret > ULONG_MAX) {
++ ret >>= 1;
++ base++;
++ }
++
++ if (base)
++ return sprintf(buf, "%lu * 2^%lu\n", (unsigned long)ret, base);
++ else
++ return sprintf(buf, "%lu\n", (unsigned long)ret);
++}
++UKSM_ATTR_RO(pages_scanned);
++
++static ssize_t hash_strength_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%lu\n", hash_strength);
++}
++UKSM_ATTR_RO(hash_strength);
++
++static ssize_t sleep_times_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%llu\n", uksm_sleep_times);
++}
++UKSM_ATTR_RO(sleep_times);
++
++
++static struct attribute *uksm_attrs[] = {
++ &max_cpu_percentage_attr.attr,
++ &sleep_millisecs_attr.attr,
++ &cpu_governor_attr.attr,
++ &run_attr.attr,
++ &ema_per_page_time_attr.attr,
++ &pages_shared_attr.attr,
++ &pages_sharing_attr.attr,
++ &pages_unshared_attr.attr,
++ &full_scans_attr.attr,
++ &pages_scanned_attr.attr,
++ &hash_strength_attr.attr,
++ &sleep_times_attr.attr,
++ &thrash_threshold_attr.attr,
++ &abundant_threshold_attr.attr,
++ &cpu_ratios_attr.attr,
++ &eval_intervals_attr.attr,
++ NULL,
++};
++
++static struct attribute_group uksm_attr_group = {
++ .attrs = uksm_attrs,
++ .name = "uksm",
++};
++#endif /* CONFIG_SYSFS */
++
++static inline void init_scan_ladder(void)
++{
++ int i;
++ struct scan_rung *rung;
++
++ for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++ rung = uksm_scan_ladder + i;
++ slot_tree_init_root(&rung->vma_root);
++ }
++
++ init_performance_values();
++ uksm_calc_scan_pages();
++}
++
++static inline int cal_positive_negative_costs(void)
++{
++ struct page *p1, *p2;
++ unsigned char *addr1, *addr2;
++ unsigned long i, time_start, hash_cost;
++ unsigned long loopnum = 0;
++
++ /*IMPORTANT: volatile is needed to prevent over-optimization by gcc. */
++ volatile u32 hash;
++ volatile int ret;
++
++ p1 = alloc_page(GFP_KERNEL);
++ if (!p1)
++ return -ENOMEM;
++
++ p2 = alloc_page(GFP_KERNEL);
++ if (!p2)
++ return -ENOMEM;
++
++ addr1 = kmap_atomic(p1);
++ addr2 = kmap_atomic(p2);
++ memset(addr1, prandom_u32(), PAGE_SIZE);
++ memcpy(addr2, addr1, PAGE_SIZE);
++
++ /* make sure that the two pages differ in last byte */
++ addr2[PAGE_SIZE-1] = ~addr2[PAGE_SIZE-1];
++ kunmap_atomic(addr2);
++ kunmap_atomic(addr1);
++
++ time_start = jiffies;
++ while (jiffies - time_start < 100) {
++ for (i = 0; i < 100; i++)
++ hash = page_hash(p1, HASH_STRENGTH_FULL, 0);
++ loopnum += 100;
++ }
++ hash_cost = (jiffies - time_start);
++
++ time_start = jiffies;
++ for (i = 0; i < loopnum; i++)
++ ret = pages_identical_with_cost(p1, p2);
++ memcmp_cost = HASH_STRENGTH_FULL * (jiffies - time_start);
++ memcmp_cost /= hash_cost;
++ pr_info("UKSM: relative memcmp_cost = %lu "
++ "hash=%u cmp_ret=%d.\n",
++ memcmp_cost, hash, ret);
++
++ __free_page(p1);
++ __free_page(p2);
++ return 0;
++}
++
++static int init_zeropage_hash_table(void)
++{
++ struct page *page;
++ char *addr;
++ int i;
++
++ page = alloc_page(GFP_KERNEL);
++ if (!page)
++ return -ENOMEM;
++
++ addr = kmap_atomic(page);
++ memset(addr, 0, PAGE_SIZE);
++ kunmap_atomic(addr);
++
++ zero_hash_table = kmalloc_array(HASH_STRENGTH_MAX, sizeof(u32),
++ GFP_KERNEL);
++ if (!zero_hash_table)
++ return -ENOMEM;
++
++ for (i = 0; i < HASH_STRENGTH_MAX; i++)
++ zero_hash_table[i] = page_hash(page, i, 0);
++
++ __free_page(page);
++
++ return 0;
++}
++
++static inline int init_random_sampling(void)
++{
++ unsigned long i;
++
++ random_nums = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if (!random_nums)
++ return -ENOMEM;
++
++ for (i = 0; i < HASH_STRENGTH_FULL; i++)
++ random_nums[i] = i;
++
++ for (i = 0; i < HASH_STRENGTH_FULL; i++) {
++ unsigned long rand_range, swap_index, tmp;
++
++ rand_range = HASH_STRENGTH_FULL - i;
++ swap_index = i + prandom_u32() % rand_range;
++ tmp = random_nums[i];
++ random_nums[i] = random_nums[swap_index];
++ random_nums[swap_index] = tmp;
++ }
++
++ rshash_state.state = RSHASH_NEW;
++ rshash_state.below_count = 0;
++ rshash_state.lookup_window_index = 0;
++
++ return cal_positive_negative_costs();
++}
++
++static int __init uksm_slab_init(void)
++{
++ rmap_item_cache = UKSM_KMEM_CACHE(rmap_item, 0);
++ if (!rmap_item_cache)
++ goto out;
++
++ stable_node_cache = UKSM_KMEM_CACHE(stable_node, 0);
++ if (!stable_node_cache)
++ goto out_free1;
++
++ node_vma_cache = UKSM_KMEM_CACHE(node_vma, 0);
++ if (!node_vma_cache)
++ goto out_free2;
++
++ vma_slot_cache = UKSM_KMEM_CACHE(vma_slot, 0);
++ if (!vma_slot_cache)
++ goto out_free3;
++
++ tree_node_cache = UKSM_KMEM_CACHE(tree_node, 0);
++ if (!tree_node_cache)
++ goto out_free4;
++
++ return 0;
++
++out_free4:
++ kmem_cache_destroy(vma_slot_cache);
++out_free3:
++ kmem_cache_destroy(node_vma_cache);
++out_free2:
++ kmem_cache_destroy(stable_node_cache);
++out_free1:
++ kmem_cache_destroy(rmap_item_cache);
++out:
++ return -ENOMEM;
++}
++
++static void __init uksm_slab_free(void)
++{
++ kmem_cache_destroy(stable_node_cache);
++ kmem_cache_destroy(rmap_item_cache);
++ kmem_cache_destroy(node_vma_cache);
++ kmem_cache_destroy(vma_slot_cache);
++ kmem_cache_destroy(tree_node_cache);
++}
++
++/* Common interface to ksm, different to it. */
++int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
++ unsigned long end, int advice, unsigned long *vm_flags)
++{
++ int err;
++
++ switch (advice) {
++ case MADV_MERGEABLE:
++ return 0; /* just ignore the advice */
++
++ case MADV_UNMERGEABLE:
++ if (!(*vm_flags & VM_MERGEABLE) || !uksm_flags_can_scan(*vm_flags))
++ return 0; /* just ignore the advice */
++
++ if (vma->anon_vma) {
++ err = unmerge_uksm_pages(vma, start, end);
++ if (err)
++ return err;
++ }
++
++ uksm_remove_vma(vma);
++ *vm_flags &= ~VM_MERGEABLE;
++ break;
++ }
++
++ return 0;
++}
++
++/* Common interface to ksm, actually the same. */
++struct page *ksm_might_need_to_copy(struct page *page,
++ struct vm_area_struct *vma, unsigned long address)
++{
++ struct anon_vma *anon_vma = page_anon_vma(page);
++ struct page *new_page;
++
++ if (PageKsm(page)) {
++ if (page_stable_node(page))
++ return page; /* no need to copy it */
++ } else if (!anon_vma) {
++ return page; /* no need to copy it */
++ } else if (anon_vma->root == vma->anon_vma->root &&
++ page->index == linear_page_index(vma, address)) {
++ return page; /* still no need to copy it */
++ }
++ if (!PageUptodate(page))
++ return page; /* let do_swap_page report the error */
++
++ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
++ if (new_page) {
++ copy_user_highpage(new_page, page, address, vma);
++
++ SetPageDirty(new_page);
++ __SetPageUptodate(new_page);
++ __SetPageLocked(new_page);
++ }
++
++ return new_page;
++}
++
++/* Copied from mm/ksm.c and required from 5.1 */
++bool reuse_ksm_page(struct page *page,
++ struct vm_area_struct *vma,
++ unsigned long address)
++{
++#ifdef CONFIG_DEBUG_VM
++ if (WARN_ON(is_zero_pfn(page_to_pfn(page))) ||
++ WARN_ON(!page_mapped(page)) ||
++ WARN_ON(!PageLocked(page))) {
++ dump_page(page, "reuse_ksm_page");
++ return false;
++ }
++#endif
++
++ if (PageSwapCache(page) || !page_stable_node(page))
++ return false;
++ /* Prohibit parallel get_ksm_page() */
++ if (!page_ref_freeze(page, 1))
++ return false;
++
++ page_move_anon_rmap(page, vma);
++ page->index = linear_page_index(vma, address);
++ page_ref_unfreeze(page, 1);
++
++ return true;
++}
++
++static int __init uksm_init(void)
++{
++ struct task_struct *uksm_thread;
++ int err;
++
++ uksm_sleep_jiffies = msecs_to_jiffies(100);
++ uksm_sleep_saved = uksm_sleep_jiffies;
++
++ slot_tree_init();
++ init_scan_ladder();
++
++
++ err = init_random_sampling();
++ if (err)
++ goto out_free2;
++
++ err = uksm_slab_init();
++ if (err)
++ goto out_free1;
++
++ err = init_zeropage_hash_table();
++ if (err)
++ goto out_free0;
++
++ uksm_thread = kthread_run(uksm_scan_thread, NULL, "uksmd");
++ if (IS_ERR(uksm_thread)) {
++ pr_err("uksm: creating kthread failed\n");
++ err = PTR_ERR(uksm_thread);
++ goto out_free;
++ }
++
++#ifdef CONFIG_SYSFS
++ err = sysfs_create_group(mm_kobj, &uksm_attr_group);
++ if (err) {
++ pr_err("uksm: register sysfs failed\n");
++ kthread_stop(uksm_thread);
++ goto out_free;
++ }
++#else
++ uksm_run = UKSM_RUN_MERGE; /* no way for user to start it */
++
++#endif /* CONFIG_SYSFS */
++
++#ifdef CONFIG_MEMORY_HOTREMOVE
++ /*
++ * Choose a high priority since the callback takes uksm_thread_mutex:
++ * later callbacks could only be taking locks which nest within that.
++ */
++ hotplug_memory_notifier(uksm_memory_callback, 100);
++#endif
++ return 0;
++
++out_free:
++ kfree(zero_hash_table);
++out_free0:
++ uksm_slab_free();
++out_free1:
++ kfree(random_nums);
++out_free2:
++ kfree(uksm_scan_ladder);
++ return err;
++}
++
++#ifdef MODULE
++subsys_initcall(ksm_init);
++#else
++late_initcall(uksm_init);
++#endif
++
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index 698bc0bc18d1..b7590f4944ca 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -1216,6 +1216,9 @@ const char * const vmstat_text[] = {
+ "nr_shadow_call_stack",
+ #endif
+
++#ifdef CONFIG_UKSM
++ "nr_uksm_zero_pages",
++#endif
+ /* enum writeback_stat_item counters */
+ "nr_dirty_threshold",
+ "nr_dirty_background_threshold",
diff --git a/x86_64-desktop-gcc-omv-defconfig b/x86_64-desktop-gcc-omv-defconfig
index e5848ca..3e61da2 100644
--- a/x86_64-desktop-gcc-omv-defconfig
+++ b/x86_64-desktop-gcc-omv-defconfig
@@ -1016,6 +1016,8 @@ CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+CONFIG_UKSM=y
+# CONFIG_KSM_LEGACY is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_MEMORY_FAILURE=y
@@ -9597,6 +9599,7 @@ CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/x86_64-server-gcc-omv-defconfig b/x86_64-server-gcc-omv-defconfig
index 286ea6c..2e0b36d 100644
--- a/x86_64-server-gcc-omv-defconfig
+++ b/x86_64-server-gcc-omv-defconfig
@@ -1001,6 +1001,8 @@ CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+CONFIG_UKSM=y
+# CONFIG_KSM_LEGACY is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_MEMORY_FAILURE=y
@@ -9340,6 +9342,7 @@ CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/x86_64-znver-desktop-gcc-omv-defconfig b/x86_64-znver-desktop-gcc-omv-defconfig
index b9c6ff0..b0a69dc 100644
--- a/x86_64-znver-desktop-gcc-omv-defconfig
+++ b/x86_64-znver-desktop-gcc-omv-defconfig
@@ -1000,6 +1000,8 @@ CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+CONFIG_UKSM=y
+# CONFIG_KSM_LEGACY is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_MEMORY_FAILURE=y
@@ -9341,6 +9343,7 @@ CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems
diff --git a/x86_64-znver-server-gcc-omv-defconfig b/x86_64-znver-server-gcc-omv-defconfig
index 5077d16..272ad4a 100644
--- a/x86_64-znver-server-gcc-omv-defconfig
+++ b/x86_64-znver-server-gcc-omv-defconfig
@@ -1001,6 +1001,8 @@ CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_MMU_NOTIFIER=y
CONFIG_KSM=y
+CONFIG_UKSM=y
+# CONFIG_KSM_LEGACY is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_MEMORY_FAILURE=y
@@ -9339,6 +9341,7 @@ CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_POSIX_ACL=y
# CONFIG_NTFS3_64BIT_CLUSTER is not set
# end of DOS/FAT/EXFAT/NT Filesystems