From a15588101329965ad3974bd571a9207d6a5e154a Mon Sep 17 00:00:00 2001 Message-Id: In-Reply-To: References: From: Chris Friesen Date: Tue, 24 Nov 2015 16:27:28 -0500 Subject: [PATCH 04/32] affine compute kernel threads This is a kernel enhancement to configure the cpu affinity of kernel threads via kernel boot option kthread_cpus=. The compute kickstart file and compute-huge.sh scripts will update grub with the new option. With kthread_cpus specified, the cpumask is immediately applied upon thread launch. This does not affect kernel threads that specify cpu and node. Note: this is based off of Christoph Lameter's patch at https://lwn.net/Articles/565932/ with the only difference being the kernel parameter changed from kthread to kthread_cpus. Signed-off-by: Christoph Lameter Signed-off-by: Chris Friesen [VT: The existing "isolcpus" kernel bootarg, cgroup/cpuset, and taskset might provide the some way to have cpu isolation. However none of them satisfies the requirements. Replacing spaces with tabs. Combine two calls of set_cpus_allowed_ptr() in kernel_init_freeable() in init/main.c into one. Performed tests] Signed-off-by: Vu Tran Signed-off-by: Jim Somerville --- Documentation/kernel-parameters.txt | 10 ++++++++++ include/linux/cpumask.h | 2 ++ init/main.c | 6 ++---- kernel/cpu.c | 13 +++++++++++++ kernel/kmod.c | 3 +++ kernel/kthread.c | 4 ++-- 6 files changed, 32 insertions(+), 6 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 02cfdf6..4eeda61 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1551,6 +1551,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. kpti [X86-64] Enable kernel page table isolation. + kthread_cpus= [KNL, SMP] Only run kernel threads on the specified + list of processors. The kernel will start threads + on the indicated processors only (unless there + are specific reasons to run a thread with + different affinities). This can be used to make + init start on certain processors and also to + control where kmod and other user space threads + are being spawned. Allows to keep kernel threads + away from certain cores unless absoluteluy necessary. + kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. Default is 0 (don't ignore, but inject #GP) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ee335c6..46ae77a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -52,6 +52,7 @@ extern int nr_cpu_ids; * cpu_present_mask - has bit 'cpu' set iff cpu is populated * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration + * cpu_kthread_mask - has bit 'cpu' set iff general kernel threads allowed * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * @@ -88,6 +89,7 @@ extern const struct cpumask *const cpu_possible_mask; extern const struct cpumask *const cpu_online_mask; extern const struct cpumask *const cpu_present_mask; extern const struct cpumask *const cpu_active_mask; +extern const struct cpumask *const cpu_kthread_mask; #if NR_CPUS > 1 #define num_online_cpus() cpumask_weight(cpu_online_mask) diff --git a/init/main.c b/init/main.c index 2e4ecd4..6e265d0 100644 --- a/init/main.c +++ b/init/main.c @@ -961,10 +961,6 @@ static noinline void __init kernel_init_freeable(void) * init can allocate pages on any node */ set_mems_allowed(node_states[N_MEMORY]); - /* - * init can run on any cpu. - */ - set_cpus_allowed_ptr(current, cpu_all_mask); cad_pid = task_pid(current); @@ -980,6 +976,8 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); + set_cpus_allowed_ptr(current, cpu_kthread_mask); + /* Open the /dev/console on the rootfs, this should never fail */ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) pr_err("Warning: unable to open an initial console.\n"); diff --git a/kernel/cpu.c b/kernel/cpu.c index 6fe84e4..325a47a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1329,6 +1329,19 @@ static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly; const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits); EXPORT_SYMBOL(cpu_active_mask); +static DECLARE_BITMAP(cpu_kthread_bits, CONFIG_NR_CPUS) __read_mostly + = CPU_BITS_ALL; +const struct cpumask *const cpu_kthread_mask = to_cpumask(cpu_kthread_bits); +EXPORT_SYMBOL(cpu_kthread_mask); + +static int __init kthread_setup(char *str) +{ + cpulist_parse(str, (struct cpumask *)&cpu_kthread_bits); + return 1; +} +__setup("kthread_cpus=", kthread_setup); + + void set_cpu_possible(unsigned int cpu, bool possible) { if (possible) diff --git a/kernel/kmod.c b/kernel/kmod.c index 86ab754..4bf584b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -204,6 +204,9 @@ static int ____call_usermodehelper(void *data) flush_signal_handlers(current, 1); spin_unlock_irq(¤t->sighand->siglock); + /* We can run only where init is allowed to run. */ + set_cpus_allowed_ptr(current, cpu_kthread_mask); + /* * Our parent is keventd, which runs with elevated scheduling priority. * Avoid propagating that into the userspace child. diff --git a/kernel/kthread.c b/kernel/kthread.c index 703d910..7ea32eb 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -284,7 +284,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), * The kernel thread should not inherit these properties. */ sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); - set_cpus_allowed_ptr(create.result, cpu_all_mask); + set_cpus_allowed_ptr(create.result, cpu_kthread_mask); } return create.result; } @@ -454,7 +454,7 @@ int kthreadd(void *unused) /* Setup a clean context for our children to inherit. */ set_task_comm(tsk, "kthreadd"); ignore_signals(tsk); - set_cpus_allowed_ptr(tsk, cpu_all_mask); + set_cpus_allowed_ptr(tsk, cpu_kthread_mask); set_mems_allowed(node_states[N_MEMORY]); current->flags |= PF_NOFREEZE; -- 1.8.3.1