3cd12006bb
Signed-off-by: Dean Troyer <dtroyer@gmail.com>
169 lines
6.0 KiB
Diff
169 lines
6.0 KiB
Diff
From 2e4df25b1cd4cd67877e1d5ad8cb48776b678b2d Mon Sep 17 00:00:00 2001
|
|
Message-Id: <2e4df25b1cd4cd67877e1d5ad8cb48776b678b2d.1507911923.git.Jim.Somerville@windriver.com>
|
|
In-Reply-To: <0bd66eb88c950d172a7dcefc61cb2e89b89cacce.1507911922.git.Jim.Somerville@windriver.com>
|
|
References: <0bd66eb88c950d172a7dcefc61cb2e89b89cacce.1507911922.git.Jim.Somerville@windriver.com>
|
|
From: Thomas Gleixner <tglx@linutronix.de>
|
|
Date: Tue, 26 May 2015 22:50:35 +0000
|
|
Subject: [PATCH 26/26] timer: Minimize nohz off overhead
|
|
|
|
If nohz is disabled on the kernel command line the [hr]timer code
|
|
still calls wake_up_nohz_cpu() and tick_nohz_full_cpu(), a pretty
|
|
pointless exercise. Cache nohz_active in [hr]timer per cpu bases and
|
|
avoid the overhead.
|
|
|
|
Before:
|
|
48.10% hog [.] main
|
|
15.25% [kernel] [k] _raw_spin_lock_irqsave
|
|
9.76% [kernel] [k] _raw_spin_unlock_irqrestore
|
|
6.50% [kernel] [k] mod_timer
|
|
6.44% [kernel] [k] lock_timer_base.isra.38
|
|
3.87% [kernel] [k] detach_if_pending
|
|
3.80% [kernel] [k] del_timer
|
|
2.67% [kernel] [k] internal_add_timer
|
|
1.33% [kernel] [k] __internal_add_timer
|
|
0.73% [kernel] [k] timerfn
|
|
0.54% [kernel] [k] wake_up_nohz_cpu
|
|
|
|
After:
|
|
48.73% hog [.] main
|
|
15.36% [kernel] [k] _raw_spin_lock_irqsave
|
|
9.77% [kernel] [k] _raw_spin_unlock_irqrestore
|
|
6.61% [kernel] [k] lock_timer_base.isra.38
|
|
6.42% [kernel] [k] mod_timer
|
|
3.90% [kernel] [k] detach_if_pending
|
|
3.76% [kernel] [k] del_timer
|
|
2.41% [kernel] [k] internal_add_timer
|
|
1.39% [kernel] [k] __internal_add_timer
|
|
0.76% [kernel] [k] timerfn
|
|
|
|
We probably should have a cached value for nohz full in the per cpu
|
|
bases as well to avoid the cpumask check. The base cache line is hot
|
|
already, the cpumask not necessarily.
|
|
|
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
|
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
|
|
Cc: Frederic Weisbecker <fweisbec@gmail.com>
|
|
Cc: Eric Dumazet <edumazet@google.com>
|
|
Cc: Viresh Kumar <viresh.kumar@linaro.org>
|
|
Cc: John Stultz <john.stultz@linaro.org>
|
|
Cc: Joonwoo Park <joonwoop@codeaurora.org>
|
|
Cc: Wenbo Wang <wenbo.wang@memblaze.com>
|
|
Link: http://lkml.kernel.org/r/20150526224512.207378134@linutronix.de
|
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
Signed-off-by: Alex Kozyrev <alex.kozyrev@windriver.com>
|
|
Signed-off-by: Jim Somerville <Jim.Somerville@windriver.com>
|
|
---
|
|
include/linux/hrtimer.h | 2 ++
|
|
kernel/time/tick-internal.h | 4 ++--
|
|
kernel/time/tick-sched.c | 2 +-
|
|
kernel/timer.c | 14 +++++++++++---
|
|
4 files changed, 16 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
|
|
index 37b5233..f93bcf7 100644
|
|
--- a/include/linux/hrtimer.h
|
|
+++ b/include/linux/hrtimer.h
|
|
@@ -176,6 +176,7 @@ enum hrtimer_base_type {
|
|
* Note that in RHEL7 clock_was_set is upstream's
|
|
* clock_was_set_seq (KABI).
|
|
* @migration_enabled: The migration of hrtimers to other cpus is enabled
|
|
+ * @nohz_active: The nohz functionality is enabled
|
|
* @expires_next: absolute time of the next event which was scheduled
|
|
* via clock_set_next_event()
|
|
* @hres_active: State of high resolution mode
|
|
@@ -191,6 +192,7 @@ struct hrtimer_cpu_base {
|
|
unsigned int active_bases;
|
|
unsigned int clock_was_set; /* clock_was_set_seq */
|
|
bool migration_enabled;
|
|
+ bool nohz_active;
|
|
#ifdef CONFIG_HIGH_RES_TIMERS
|
|
ktime_t expires_next;
|
|
int hres_active;
|
|
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
|
|
index 3ebdda4..13468bd 100644
|
|
--- a/kernel/time/tick-internal.h
|
|
+++ b/kernel/time/tick-internal.h
|
|
@@ -173,9 +173,9 @@ extern unsigned long tick_nohz_active;
|
|
#endif
|
|
|
|
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
|
-extern void timers_update_migration(void);
|
|
+extern void timers_update_migration(bool update_nohz);
|
|
#else
|
|
-static inline void timers_update_migration(void) { }
|
|
+static inline void timers_update_migration(bool update_nohz) { }
|
|
#endif
|
|
|
|
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
|
|
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
|
|
index 6c92920..3ccc18c 100644
|
|
--- a/kernel/time/tick-sched.c
|
|
+++ b/kernel/time/tick-sched.c
|
|
@@ -980,7 +980,7 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
|
|
ts->nohz_mode = mode;
|
|
/* One update is enough */
|
|
if (!test_and_set_bit(0, &tick_nohz_active))
|
|
- timers_update_migration();
|
|
+ timers_update_migration(true);
|
|
}
|
|
|
|
/**
|
|
diff --git a/kernel/timer.c b/kernel/timer.c
|
|
index 1c40b95..d14e37b 100644
|
|
--- a/kernel/timer.c
|
|
+++ b/kernel/timer.c
|
|
@@ -88,6 +88,7 @@ struct tvec_base {
|
|
unsigned long active_timers;
|
|
int cpu;
|
|
bool migration_enabled;
|
|
+ bool nohz_active;
|
|
struct tvec_root tv1;
|
|
struct tvec tv2;
|
|
struct tvec tv3;
|
|
@@ -103,7 +104,7 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
|
|
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
|
unsigned int sysctl_timer_migration = 1;
|
|
|
|
-void timers_update_migration(void)
|
|
+void timers_update_migration(bool update_nohz)
|
|
{
|
|
bool on = sysctl_timer_migration && tick_nohz_active;
|
|
unsigned int cpu;
|
|
@@ -119,6 +120,10 @@ void timers_update_migration(void)
|
|
tvec_base->migration_enabled = on;
|
|
hrtimer_base = &per_cpu(hrtimer_bases, cpu);
|
|
hrtimer_base->migration_enabled = on;
|
|
+ if (!update_nohz)
|
|
+ continue;
|
|
+ tvec_base->nohz_active = true;
|
|
+ hrtimer_base->nohz_active = true;
|
|
}
|
|
}
|
|
|
|
@@ -132,7 +137,7 @@ int timer_migration_handler(struct ctl_table *table, int write,
|
|
mutex_lock(&mutex);
|
|
ret = proc_dointvec(table, write, buffer, lenp, ppos);
|
|
if (!ret && write)
|
|
- timers_update_migration();
|
|
+ timers_update_migration(false);
|
|
mutex_unlock(&mutex);
|
|
return ret;
|
|
}
|
|
@@ -482,8 +487,11 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
|
|
* require special care against races with idle_cpu(), lets deal
|
|
* with that later.
|
|
*/
|
|
- if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu))
|
|
+ if (base->nohz_active) {
|
|
+ if (!tbase_get_deferrable(base) ||
|
|
+ tick_nohz_full_cpu(base->cpu))
|
|
wake_up_nohz_cpu(base->cpu);
|
|
+ }
|
|
}
|
|
|
|
#ifdef CONFIG_TIMER_STATS
|
|
--
|
|
1.8.3.1
|
|
|