
This commit updates kernel to 5.10.189 to fix following CVE issue: CVE-2023-4132: https://nvd.nist.gov/vuln/detail/CVE-2023-4132 CVE-2023-4004: https://nvd.nist.gov/vuln/detail/CVE-2023-4004 CVE-2023-20593: https://nvd.nist.gov/vuln/detail/CVE-2023-20593 CVE-2023-3863: https://nvd.nist.gov/vuln/detail/CVE-2023-3863 CVE-2023-31248: https://nvd.nist.gov/vuln/detail/CVE-2023-31248 CVE-2023-35001: https://nvd.nist.gov/vuln/detail/CVE-2023-35001 CVE-2023-3117: https://nvd.nist.gov/vuln/detail/CVE-2023-3117 CVE-2023-3611: https://nvd.nist.gov/vuln/detail/CVE-2023-3611 CVE-2023-3610: https://nvd.nist.gov/vuln/detail/CVE-2023-3610 CVE-2023-3776: https://nvd.nist.gov/vuln/detail/CVE-2023-3776 CVE-2023-3390: https://nvd.nist.gov/vuln/detail/CVE-2023-3390 CVE-2023-2898: https://nvd.nist.gov/vuln/detail/CVE-2023-2898 One of our source patches requires refresh against the new kernel source. It was modified for missed parameter need be added in the new kernel: Port-negative-dentries-limit-feature-from-3.10.patch. After upgrading kernel, new function eth_hw_addr_set was added in linux-headers-5.10.0-6-common. While it has already defined in the following driver modules: i40e,i40e-cvl-4.10,iavf,iavf-cvl-4.10,ice,ice-cvl-4.10. To avoid the redefinition conflict, we allow the out-of-tree drivers to use the newly added in-tree version of the eth_hw_addr_set function. This is achieved by undefining the NEED_ETH_HW_ADDR_SET macro. Verification: - Build kernel and out of tree modules success for rt and std. - Build iso success for rt and std. - Install success onto a AIO-DX lab with rt kernel. - Boot up successfully in the lab. - The sanity testing was done by our test team and no regression defect was found. - The cyclictest benchmark was also run on the starlingx lab, the result is "samples: 259199999 avg: 1633 max: 8817 99.9999th percentile: 7612 overflows: 0", It is not big difference with 5.10.185 for avg and max. Closes-Bug: 2029211 Change-Id: I107a0c0285ad2de39d56863cc5fed6273ad7fbd4 Signed-off-by: Peng Zhang <Peng.Zhang2@windriver.com>
302 lines
9.3 KiB
Diff
302 lines
9.3 KiB
Diff
From 895e060d5da5a9f2d100495abafcb15871c2c486 Mon Sep 17 00:00:00 2001
|
|
From: Jim Somerville <jim.somerville@windriver.com>
|
|
Date: Fri, 14 Apr 2023 15:29:22 -0400
|
|
Subject: [PATCH] Port negative dentries limit feature from 3.10
|
|
|
|
This ports the Redhat feature forward from the 3.10 kernel version.
|
|
|
|
This feature allows one to specifiy a loose maximum of total memory
|
|
which is allowed to be used for negative dentries. This is done
|
|
via setting a sysctl variable which is used to calculate a
|
|
negative dentry limit for the system. Every 15 seconds a kworker
|
|
task will prune back the negative dentries that exceed the limit,
|
|
plus an extra 1% for hysteresis purposes.
|
|
|
|
Intent is that the feature code is kept as close to the 3.10 version
|
|
as possible.
|
|
|
|
Main differences from the 3.10 version of the code:
|
|
- count of dentries associated with a superblock is kept in a
|
|
different location, requiring a procedure call to obtain
|
|
- superblocks are now kept by node id and memcg, requiring
|
|
more calls into iterate_super
|
|
|
|
Signed-off-by: Jim Somerville <jim.somerville@windriver.com>
|
|
[zp: Adapted the patch for context and code changes.]
|
|
Signed-off-by: Peng Zhang <Peng.Zhang2@windriver.com>
|
|
---
|
|
fs/dcache.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++-
|
|
kernel/sysctl.c | 12 ++++
|
|
2 files changed, 188 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/fs/dcache.c b/fs/dcache.c
|
|
index 30dec5522..51bafdd49 100644
|
|
--- a/fs/dcache.c
|
|
+++ b/fs/dcache.c
|
|
@@ -32,6 +32,7 @@
|
|
#include <linux/bit_spinlock.h>
|
|
#include <linux/rculist_bl.h>
|
|
#include <linux/list_lru.h>
|
|
+#include <linux/memcontrol.h>
|
|
#include "internal.h"
|
|
#include "mount.h"
|
|
|
|
@@ -119,6 +120,30 @@ struct dentry_stat_t dentry_stat = {
|
|
.age_limit = 45,
|
|
};
|
|
|
|
+/*
|
|
+ * dcache_negative_dentry_limit_sysctl:
|
|
+ * This is sysctl parameter "negative-dentry-limit" which specifies a
|
|
+ * limit for the number of negative dentries allowed in a system as a
|
|
+ * multiple of one-thousandth of the total system memory. The default
|
|
+ * is 0 which means there is no limit and the valid range is 0-100.
|
|
+ * So up to 10% of the total system memory can be used.
|
|
+ *
|
|
+ * negative_dentry_limit:
|
|
+ * The actual number of negative dentries allowed which is computed after
|
|
+ * the user changes dcache_negative_dentry_limit_sysctl.
|
|
+ */
|
|
+static long negative_dentry_limit;
|
|
+int dcache_negative_dentry_limit_sysctl;
|
|
+EXPORT_SYMBOL_GPL(dcache_negative_dentry_limit_sysctl);
|
|
+
|
|
+/*
|
|
+ * There will be a periodic check to see if the negative dentry limit
|
|
+ * is exceeded. If so, the excess negative dentries will be removed.
|
|
+ */
|
|
+#define NEGATIVE_DENTRY_CHECK_PERIOD (15 * HZ) /* Check every 15s */
|
|
+static void prune_negative_dentry(struct work_struct *work);
|
|
+static DECLARE_DELAYED_WORK(prune_negative_dentry_work, prune_negative_dentry);
|
|
+
|
|
static DEFINE_PER_CPU(long, nr_dentry);
|
|
static DEFINE_PER_CPU(long, nr_dentry_unused);
|
|
static DEFINE_PER_CPU(long, nr_dentry_negative);
|
|
@@ -175,6 +200,43 @@ int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
|
|
}
|
|
#endif
|
|
|
|
+/*
|
|
+ * Sysctl proc handler for dcache_negativ3_dentry_limit_sysctl.
|
|
+ */
|
|
+int proc_dcache_negative_dentry_limit(struct ctl_table *ctl, int write,
|
|
+ void __user *buffer, size_t *lenp,
|
|
+ loff_t *ppos)
|
|
+{
|
|
+ /* Rough estimate of # of dentries allocated per page */
|
|
+ const unsigned int nr_dentry_page = PAGE_SIZE / sizeof(struct dentry);
|
|
+ int old = dcache_negative_dentry_limit_sysctl;
|
|
+ int ret;
|
|
+
|
|
+ ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
|
|
+
|
|
+ if (!write || ret || (dcache_negative_dentry_limit_sysctl == old))
|
|
+ return ret;
|
|
+
|
|
+ negative_dentry_limit = totalram_pages() * nr_dentry_page *
|
|
+ dcache_negative_dentry_limit_sysctl / 1000;
|
|
+
|
|
+ /*
|
|
+ * The periodic dentry pruner only runs when the limit is non-zero.
|
|
+ * The sysctl handler is the only trigger mechanism that can be
|
|
+ * used to start/stop the prune work reliably, so we do that here
|
|
+ * after calculating the new limit.
|
|
+ */
|
|
+ if (dcache_negative_dentry_limit_sysctl && !old)
|
|
+ schedule_delayed_work(&prune_negative_dentry_work, 0);
|
|
+
|
|
+ if (!dcache_negative_dentry_limit_sysctl && old)
|
|
+ cancel_delayed_work_sync(&prune_negative_dentry_work);
|
|
+
|
|
+ pr_info("Negative dentry limits = %ld\n", negative_dentry_limit);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(proc_dcache_negative_dentry_limit);
|
|
+
|
|
/*
|
|
* Compare 2 name strings, return 0 if they match, otherwise non-zero.
|
|
* The strings are both count bytes long, and count is non-zero.
|
|
@@ -1142,8 +1204,9 @@ void shrink_dentry_list(struct list_head *list)
|
|
}
|
|
}
|
|
|
|
-static enum lru_status dentry_lru_isolate(struct list_head *item,
|
|
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
|
|
+static enum lru_status _dentry_lru_isolate(struct list_head *item,
|
|
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg,
|
|
+ bool negative_only)
|
|
{
|
|
struct list_head *freeable = arg;
|
|
struct dentry *dentry = container_of(item, struct dentry, d_lru);
|
|
@@ -1194,12 +1257,29 @@ static enum lru_status dentry_lru_isolate(struct list_head *item,
|
|
return LRU_ROTATE;
|
|
}
|
|
|
|
+ if (negative_only && !d_is_negative(dentry)) {
|
|
+ spin_unlock(&dentry->d_lock);
|
|
+ return LRU_SKIP;
|
|
+ }
|
|
+
|
|
d_lru_shrink_move(lru, dentry, freeable);
|
|
spin_unlock(&dentry->d_lock);
|
|
|
|
return LRU_REMOVED;
|
|
}
|
|
|
|
+static enum lru_status dentry_lru_isolate(struct list_head *item,
|
|
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
|
|
+{
|
|
+ return _dentry_lru_isolate(item, lru, lru_lock, arg, false);
|
|
+}
|
|
+
|
|
+static enum lru_status dentry_lru_isolate_negative(struct list_head *item,
|
|
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
|
|
+{
|
|
+ return _dentry_lru_isolate(item, lru, lru_lock, arg, true);
|
|
+}
|
|
+
|
|
/**
|
|
* prune_dcache_sb - shrink the dcache
|
|
* @sb: superblock
|
|
@@ -1223,6 +1303,20 @@ long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
|
|
return freed;
|
|
}
|
|
|
|
+/**
|
|
+ * Does the same thing as prune_dcache_sb but only gets rid of negative dentries
|
|
+ */
|
|
+long prune_dcache_sb_negative(struct super_block *sb, struct shrink_control *sc)
|
|
+{
|
|
+ LIST_HEAD(dispose);
|
|
+ long freed;
|
|
+
|
|
+ freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
|
|
+ dentry_lru_isolate_negative, &dispose);
|
|
+ shrink_dentry_list(&dispose);
|
|
+ return freed;
|
|
+}
|
|
+
|
|
static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
|
|
struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
|
|
{
|
|
@@ -1618,6 +1712,86 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
|
|
return D_WALK_CONTINUE;
|
|
}
|
|
|
|
+struct prune_negative_ctrl
|
|
+{
|
|
+ long prune_count;
|
|
+ int prune_percent; /* Each unit = 0.01% */
|
|
+
|
|
+ struct shrink_control shrink_ctl;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Prune dentries from a super block.
|
|
+ */
|
|
+static void prune_negative_one_sb(struct super_block *sb, void *arg)
|
|
+{
|
|
+ struct prune_negative_ctrl *ctrl = arg;
|
|
+ unsigned long count = list_lru_count_one(&sb->s_dentry_lru, ctrl->shrink_ctl.nid, ctrl->shrink_ctl.memcg);
|
|
+ long scan = (count * ctrl->prune_percent) / 10000;
|
|
+ struct shrink_control shrink_ctl = ctrl->shrink_ctl;
|
|
+
|
|
+ if (scan) {
|
|
+ shrink_ctl.nr_to_scan = scan;
|
|
+ ctrl->prune_count += prune_dcache_sb_negative(sb, &shrink_ctl);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * A workqueue function to prune negative dentry.
|
|
+ */
|
|
+static void prune_negative_dentry(struct work_struct *work)
|
|
+{
|
|
+ long count = get_nr_dentry_negative();
|
|
+ long limit = negative_dentry_limit;
|
|
+ struct prune_negative_ctrl ctrl;
|
|
+ unsigned long start;
|
|
+ struct mem_cgroup *memcg;
|
|
+ int nid;
|
|
+
|
|
+ if (!limit || count <= limit)
|
|
+ goto requeue_work;
|
|
+
|
|
+ /*
|
|
+ * Add an extra 1% as a minimum and to increase the chance
|
|
+ * that the after operation dentry count stays below the limit.
|
|
+ */
|
|
+ ctrl.prune_count = 0;
|
|
+ ctrl.prune_percent = ((count - limit) * 10000 / count) + 100;
|
|
+
|
|
+ ctrl.shrink_ctl.gfp_mask = GFP_KERNEL;
|
|
+ start = jiffies;
|
|
+
|
|
+
|
|
+ for_each_online_node(nid) {
|
|
+
|
|
+ ctrl.shrink_ctl.nid = nid;
|
|
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
|
+ do {
|
|
+ ctrl.shrink_ctl.memcg = memcg;
|
|
+ /*
|
|
+ * iterate_supers() will take a read lock on the supers blocking
|
|
+ * concurrent umount.
|
|
+ */
|
|
+ iterate_supers(prune_negative_one_sb, &ctrl);
|
|
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Report negative dentry pruning stat.
|
|
+ */
|
|
+ pr_debug("%ld negative dentries freed in %d ms\n",
|
|
+ ctrl.prune_count, jiffies_to_msecs(jiffies - start));
|
|
+
|
|
+requeue_work:
|
|
+ /*
|
|
+ * The requeuing will get cancelled if there is a concurrent
|
|
+ * cancel_delayed_work_sync() call from user sysctl operation.
|
|
+ * That call will wait until this work finishes and cancel it.
|
|
+ */
|
|
+ schedule_delayed_work(&prune_negative_dentry_work,
|
|
+ NEGATIVE_DENTRY_CHECK_PERIOD);
|
|
+}
|
|
+
|
|
static void do_one_tree(struct dentry *dentry)
|
|
{
|
|
shrink_dcache_parent(dentry);
|
|
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
|
index a45f0dd10..034517367 100644
|
|
--- a/kernel/sysctl.c
|
|
+++ b/kernel/sysctl.c
|
|
@@ -106,6 +106,9 @@
|
|
|
|
#if defined(CONFIG_SYSCTL)
|
|
|
|
+extern int dcache_negative_dentry_limit_sysctl;
|
|
+extern proc_handler proc_dcache_negative_dentry_limit;
|
|
+
|
|
/* Constants used for minimum and maximum */
|
|
#ifdef CONFIG_LOCKUP_DETECTOR
|
|
static int sixty = 60;
|
|
@@ -3416,6 +3419,15 @@ static struct ctl_table fs_table[] = {
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = SYSCTL_ONE,
|
|
},
|
|
+ {
|
|
+ .procname = "negative-dentry-limit",
|
|
+ .data = &dcache_negative_dentry_limit_sysctl,
|
|
+ .maxlen = sizeof(dcache_negative_dentry_limit_sysctl),
|
|
+ .mode = 0644,
|
|
+ .proc_handler = proc_dcache_negative_dentry_limit,
|
|
+ .extra1 = &zero_ul,
|
|
+ .extra2 = SYSCTL_ONE_HUNDRED,
|
|
+ },
|
|
{ }
|
|
};
|
|
|
|
--
|
|
2.30.2
|
|
|