kernel/kernel-std/debian/patches/0056-Port-negative-dentries-limit-feature-from-3.10.patch
Peng Zhang 825266d5ac Update kernel to v5.10.189
This commit updates kernel to 5.10.189 to fix following CVE issue:
CVE-2023-4132: https://nvd.nist.gov/vuln/detail/CVE-2023-4132
CVE-2023-4004: https://nvd.nist.gov/vuln/detail/CVE-2023-4004
CVE-2023-20593: https://nvd.nist.gov/vuln/detail/CVE-2023-20593
CVE-2023-3863: https://nvd.nist.gov/vuln/detail/CVE-2023-3863
CVE-2023-31248: https://nvd.nist.gov/vuln/detail/CVE-2023-31248
CVE-2023-35001: https://nvd.nist.gov/vuln/detail/CVE-2023-35001
CVE-2023-3117: https://nvd.nist.gov/vuln/detail/CVE-2023-3117
CVE-2023-3611: https://nvd.nist.gov/vuln/detail/CVE-2023-3611
CVE-2023-3610: https://nvd.nist.gov/vuln/detail/CVE-2023-3610
CVE-2023-3776: https://nvd.nist.gov/vuln/detail/CVE-2023-3776
CVE-2023-3390: https://nvd.nist.gov/vuln/detail/CVE-2023-3390
CVE-2023-2898: https://nvd.nist.gov/vuln/detail/CVE-2023-2898

One of our source patches requires refresh against the new kernel
source. It was modified for missed parameter need be added in the
new kernel:
       Port-negative-dentries-limit-feature-from-3.10.patch.

After upgrading kernel, new function eth_hw_addr_set was added in
linux-headers-5.10.0-6-common. While it has already defined in the
following driver modules:
        i40e,i40e-cvl-4.10,iavf,iavf-cvl-4.10,ice,ice-cvl-4.10.
To avoid the redefinition conflict, we allow the out-of-tree drivers
to use the newly added in-tree version of the eth_hw_addr_set
function. This is achieved by undefining the NEED_ETH_HW_ADDR_SET
macro.

Verification:
- Build kernel and out of tree modules success for rt and std.
- Build iso success for rt and std.
- Install success onto a AIO-DX lab with rt kernel.
- Boot up successfully in the lab.
- The sanity testing was done by our test team and no regression
  defect was found.
- The cyclictest benchmark was also run on the starlingx lab, the
  result is "samples: 259199999 avg: 1633 max: 8817 99.9999th
  percentile: 7612 overflows: 0", It is not big difference with
  5.10.185 for avg and max.

Closes-Bug: 2029211

Change-Id: I107a0c0285ad2de39d56863cc5fed6273ad7fbd4
Signed-off-by: Peng Zhang <Peng.Zhang2@windriver.com>
2023-09-23 01:17:16 +08:00

302 lines
9.3 KiB
Diff

From 895e060d5da5a9f2d100495abafcb15871c2c486 Mon Sep 17 00:00:00 2001
From: Jim Somerville <jim.somerville@windriver.com>
Date: Fri, 14 Apr 2023 15:29:22 -0400
Subject: [PATCH] Port negative dentries limit feature from 3.10
This ports the Redhat feature forward from the 3.10 kernel version.
This feature allows one to specifiy a loose maximum of total memory
which is allowed to be used for negative dentries. This is done
via setting a sysctl variable which is used to calculate a
negative dentry limit for the system. Every 15 seconds a kworker
task will prune back the negative dentries that exceed the limit,
plus an extra 1% for hysteresis purposes.
Intent is that the feature code is kept as close to the 3.10 version
as possible.
Main differences from the 3.10 version of the code:
- count of dentries associated with a superblock is kept in a
different location, requiring a procedure call to obtain
- superblocks are now kept by node id and memcg, requiring
more calls into iterate_super
Signed-off-by: Jim Somerville <jim.somerville@windriver.com>
[zp: Adapted the patch for context and code changes.]
Signed-off-by: Peng Zhang <Peng.Zhang2@windriver.com>
---
fs/dcache.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++-
kernel/sysctl.c | 12 ++++
2 files changed, 188 insertions(+), 2 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 30dec5522..51bafdd49 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
#include <linux/list_lru.h>
+#include <linux/memcontrol.h>
#include "internal.h"
#include "mount.h"
@@ -119,6 +120,30 @@ struct dentry_stat_t dentry_stat = {
.age_limit = 45,
};
+/*
+ * dcache_negative_dentry_limit_sysctl:
+ * This is sysctl parameter "negative-dentry-limit" which specifies a
+ * limit for the number of negative dentries allowed in a system as a
+ * multiple of one-thousandth of the total system memory. The default
+ * is 0 which means there is no limit and the valid range is 0-100.
+ * So up to 10% of the total system memory can be used.
+ *
+ * negative_dentry_limit:
+ * The actual number of negative dentries allowed which is computed after
+ * the user changes dcache_negative_dentry_limit_sysctl.
+ */
+static long negative_dentry_limit;
+int dcache_negative_dentry_limit_sysctl;
+EXPORT_SYMBOL_GPL(dcache_negative_dentry_limit_sysctl);
+
+/*
+ * There will be a periodic check to see if the negative dentry limit
+ * is exceeded. If so, the excess negative dentries will be removed.
+ */
+#define NEGATIVE_DENTRY_CHECK_PERIOD (15 * HZ) /* Check every 15s */
+static void prune_negative_dentry(struct work_struct *work);
+static DECLARE_DELAYED_WORK(prune_negative_dentry_work, prune_negative_dentry);
+
static DEFINE_PER_CPU(long, nr_dentry);
static DEFINE_PER_CPU(long, nr_dentry_unused);
static DEFINE_PER_CPU(long, nr_dentry_negative);
@@ -175,6 +200,43 @@ int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
}
#endif
+/*
+ * Sysctl proc handler for dcache_negativ3_dentry_limit_sysctl.
+ */
+int proc_dcache_negative_dentry_limit(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ /* Rough estimate of # of dentries allocated per page */
+ const unsigned int nr_dentry_page = PAGE_SIZE / sizeof(struct dentry);
+ int old = dcache_negative_dentry_limit_sysctl;
+ int ret;
+
+ ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+
+ if (!write || ret || (dcache_negative_dentry_limit_sysctl == old))
+ return ret;
+
+ negative_dentry_limit = totalram_pages() * nr_dentry_page *
+ dcache_negative_dentry_limit_sysctl / 1000;
+
+ /*
+ * The periodic dentry pruner only runs when the limit is non-zero.
+ * The sysctl handler is the only trigger mechanism that can be
+ * used to start/stop the prune work reliably, so we do that here
+ * after calculating the new limit.
+ */
+ if (dcache_negative_dentry_limit_sysctl && !old)
+ schedule_delayed_work(&prune_negative_dentry_work, 0);
+
+ if (!dcache_negative_dentry_limit_sysctl && old)
+ cancel_delayed_work_sync(&prune_negative_dentry_work);
+
+ pr_info("Negative dentry limits = %ld\n", negative_dentry_limit);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(proc_dcache_negative_dentry_limit);
+
/*
* Compare 2 name strings, return 0 if they match, otherwise non-zero.
* The strings are both count bytes long, and count is non-zero.
@@ -1142,8 +1204,9 @@ void shrink_dentry_list(struct list_head *list)
}
}
-static enum lru_status dentry_lru_isolate(struct list_head *item,
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+static enum lru_status _dentry_lru_isolate(struct list_head *item,
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg,
+ bool negative_only)
{
struct list_head *freeable = arg;
struct dentry *dentry = container_of(item, struct dentry, d_lru);
@@ -1194,12 +1257,29 @@ static enum lru_status dentry_lru_isolate(struct list_head *item,
return LRU_ROTATE;
}
+ if (negative_only && !d_is_negative(dentry)) {
+ spin_unlock(&dentry->d_lock);
+ return LRU_SKIP;
+ }
+
d_lru_shrink_move(lru, dentry, freeable);
spin_unlock(&dentry->d_lock);
return LRU_REMOVED;
}
+static enum lru_status dentry_lru_isolate(struct list_head *item,
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+{
+ return _dentry_lru_isolate(item, lru, lru_lock, arg, false);
+}
+
+static enum lru_status dentry_lru_isolate_negative(struct list_head *item,
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+{
+ return _dentry_lru_isolate(item, lru, lru_lock, arg, true);
+}
+
/**
* prune_dcache_sb - shrink the dcache
* @sb: superblock
@@ -1223,6 +1303,20 @@ long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
return freed;
}
+/**
+ * Does the same thing as prune_dcache_sb but only gets rid of negative dentries
+ */
+long prune_dcache_sb_negative(struct super_block *sb, struct shrink_control *sc)
+{
+ LIST_HEAD(dispose);
+ long freed;
+
+ freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
+ dentry_lru_isolate_negative, &dispose);
+ shrink_dentry_list(&dispose);
+ return freed;
+}
+
static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
{
@@ -1618,6 +1712,86 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
return D_WALK_CONTINUE;
}
+struct prune_negative_ctrl
+{
+ long prune_count;
+ int prune_percent; /* Each unit = 0.01% */
+
+ struct shrink_control shrink_ctl;
+};
+
+/*
+ * Prune dentries from a super block.
+ */
+static void prune_negative_one_sb(struct super_block *sb, void *arg)
+{
+ struct prune_negative_ctrl *ctrl = arg;
+ unsigned long count = list_lru_count_one(&sb->s_dentry_lru, ctrl->shrink_ctl.nid, ctrl->shrink_ctl.memcg);
+ long scan = (count * ctrl->prune_percent) / 10000;
+ struct shrink_control shrink_ctl = ctrl->shrink_ctl;
+
+ if (scan) {
+ shrink_ctl.nr_to_scan = scan;
+ ctrl->prune_count += prune_dcache_sb_negative(sb, &shrink_ctl);
+ }
+}
+
+/*
+ * A workqueue function to prune negative dentry.
+ */
+static void prune_negative_dentry(struct work_struct *work)
+{
+ long count = get_nr_dentry_negative();
+ long limit = negative_dentry_limit;
+ struct prune_negative_ctrl ctrl;
+ unsigned long start;
+ struct mem_cgroup *memcg;
+ int nid;
+
+ if (!limit || count <= limit)
+ goto requeue_work;
+
+ /*
+ * Add an extra 1% as a minimum and to increase the chance
+ * that the after operation dentry count stays below the limit.
+ */
+ ctrl.prune_count = 0;
+ ctrl.prune_percent = ((count - limit) * 10000 / count) + 100;
+
+ ctrl.shrink_ctl.gfp_mask = GFP_KERNEL;
+ start = jiffies;
+
+
+ for_each_online_node(nid) {
+
+ ctrl.shrink_ctl.nid = nid;
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ ctrl.shrink_ctl.memcg = memcg;
+ /*
+ * iterate_supers() will take a read lock on the supers blocking
+ * concurrent umount.
+ */
+ iterate_supers(prune_negative_one_sb, &ctrl);
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
+ }
+
+ /*
+ * Report negative dentry pruning stat.
+ */
+ pr_debug("%ld negative dentries freed in %d ms\n",
+ ctrl.prune_count, jiffies_to_msecs(jiffies - start));
+
+requeue_work:
+ /*
+ * The requeuing will get cancelled if there is a concurrent
+ * cancel_delayed_work_sync() call from user sysctl operation.
+ * That call will wait until this work finishes and cancel it.
+ */
+ schedule_delayed_work(&prune_negative_dentry_work,
+ NEGATIVE_DENTRY_CHECK_PERIOD);
+}
+
static void do_one_tree(struct dentry *dentry)
{
shrink_dcache_parent(dentry);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a45f0dd10..034517367 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -106,6 +106,9 @@
#if defined(CONFIG_SYSCTL)
+extern int dcache_negative_dentry_limit_sysctl;
+extern proc_handler proc_dcache_negative_dentry_limit;
+
/* Constants used for minimum and maximum */
#ifdef CONFIG_LOCKUP_DETECTOR
static int sixty = 60;
@@ -3416,6 +3419,15 @@ static struct ctl_table fs_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
},
+ {
+ .procname = "negative-dentry-limit",
+ .data = &dcache_negative_dentry_limit_sysctl,
+ .maxlen = sizeof(dcache_negative_dentry_limit_sysctl),
+ .mode = 0644,
+ .proc_handler = proc_dcache_negative_dentry_limit,
+ .extra1 = &zero_ul,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
{ }
};
--
2.30.2