From 2e4df25b1cd4cd67877e1d5ad8cb48776b678b2d Mon Sep 17 00:00:00 2001 Message-Id: <2e4df25b1cd4cd67877e1d5ad8cb48776b678b2d.1507911923.git.Jim.Somerville@windriver.com> In-Reply-To: <0bd66eb88c950d172a7dcefc61cb2e89b89cacce.1507911922.git.Jim.Somerville@windriver.com> References: <0bd66eb88c950d172a7dcefc61cb2e89b89cacce.1507911922.git.Jim.Somerville@windriver.com> From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:35 +0000 Subject: [PATCH 26/26] timer: Minimize nohz off overhead If nohz is disabled on the kernel command line the [hr]timer code still calls wake_up_nohz_cpu() and tick_nohz_full_cpu(), a pretty pointless exercise. Cache nohz_active in [hr]timer per cpu bases and avoid the overhead. Before: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu After: 48.73% hog [.] main 15.36% [kernel] [k] _raw_spin_lock_irqsave 9.77% [kernel] [k] _raw_spin_unlock_irqrestore 6.61% [kernel] [k] lock_timer_base.isra.38 6.42% [kernel] [k] mod_timer 3.90% [kernel] [k] detach_if_pending 3.76% [kernel] [k] del_timer 2.41% [kernel] [k] internal_add_timer 1.39% [kernel] [k] __internal_add_timer 0.76% [kernel] [k] timerfn We probably should have a cached value for nohz full in the per cpu bases as well to avoid the cpumask check. The base cache line is hot already, the cpumask not necessarily. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224512.207378134@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Alex Kozyrev Signed-off-by: Jim Somerville --- include/linux/hrtimer.h | 2 ++ kernel/time/tick-internal.h | 4 ++-- kernel/time/tick-sched.c | 2 +- kernel/timer.c | 14 +++++++++++--- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 37b5233..f93bcf7 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -176,6 +176,7 @@ enum hrtimer_base_type { * Note that in RHEL7 clock_was_set is upstream's * clock_was_set_seq (KABI). * @migration_enabled: The migration of hrtimers to other cpus is enabled + * @nohz_active: The nohz functionality is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -191,6 +192,7 @@ struct hrtimer_cpu_base { unsigned int active_bases; unsigned int clock_was_set; /* clock_was_set_seq */ bool migration_enabled; + bool nohz_active; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 3ebdda4..13468bd 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -173,9 +173,9 @@ extern unsigned long tick_nohz_active; #endif #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void timers_update_migration(void); +extern void timers_update_migration(bool update_nohz); #else -static inline void timers_update_migration(void) { } +static inline void timers_update_migration(bool update_nohz) { } #endif DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6c92920..3ccc18c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -980,7 +980,7 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode) ts->nohz_mode = mode; /* One update is enough */ if (!test_and_set_bit(0, &tick_nohz_active)) - timers_update_migration(); + timers_update_migration(true); } /** diff --git a/kernel/timer.c b/kernel/timer.c index 1c40b95..d14e37b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -88,6 +88,7 @@ struct tvec_base { unsigned long active_timers; int cpu; bool migration_enabled; + bool nohz_active; struct tvec_root tv1; struct tvec tv2; struct tvec tv3; @@ -103,7 +104,7 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) unsigned int sysctl_timer_migration = 1; -void timers_update_migration(void) +void timers_update_migration(bool update_nohz) { bool on = sysctl_timer_migration && tick_nohz_active; unsigned int cpu; @@ -119,6 +120,10 @@ void timers_update_migration(void) tvec_base->migration_enabled = on; hrtimer_base = &per_cpu(hrtimer_bases, cpu); hrtimer_base->migration_enabled = on; + if (!update_nohz) + continue; + tvec_base->nohz_active = true; + hrtimer_base->nohz_active = true; } } @@ -132,7 +137,7 @@ int timer_migration_handler(struct ctl_table *table, int write, mutex_lock(&mutex); ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) - timers_update_migration(); + timers_update_migration(false); mutex_unlock(&mutex); return ret; } @@ -482,8 +487,11 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) * require special care against races with idle_cpu(), lets deal * with that later. */ - if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu)) + if (base->nohz_active) { + if (!tbase_get_deferrable(base) || + tick_nohz_full_cpu(base->cpu)) wake_up_nohz_cpu(base->cpu); + } } #ifdef CONFIG_TIMER_STATS -- 1.8.3.1