Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/acpi/acpi_pad.c
15109 views
1
/*
2
* acpi_pad.c ACPI Processor Aggregator Driver
3
*
4
* Copyright (c) 2009, Intel Corporation.
5
*
6
* This program is free software; you can redistribute it and/or modify it
7
* under the terms and conditions of the GNU General Public License,
8
* version 2, as published by the Free Software Foundation.
9
*
10
* This program is distributed in the hope it will be useful, but WITHOUT
11
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13
* more details.
14
*
15
* You should have received a copy of the GNU General Public License along with
16
* this program; if not, write to the Free Software Foundation, Inc.,
17
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
*/
20
21
#include <linux/kernel.h>
22
#include <linux/cpumask.h>
23
#include <linux/module.h>
24
#include <linux/init.h>
25
#include <linux/types.h>
26
#include <linux/kthread.h>
27
#include <linux/freezer.h>
28
#include <linux/cpu.h>
29
#include <linux/clockchips.h>
30
#include <linux/slab.h>
31
#include <acpi/acpi_bus.h>
32
#include <acpi/acpi_drivers.h>
33
#include <asm/mwait.h>
34
35
#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad"
36
#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
37
#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
38
static DEFINE_MUTEX(isolated_cpus_lock);
39
40
static unsigned long power_saving_mwait_eax;
41
42
static unsigned char tsc_detected_unstable;
43
static unsigned char tsc_marked_unstable;
44
static unsigned char lapic_detected_unstable;
45
static unsigned char lapic_marked_unstable;
46
47
static void power_saving_mwait_init(void)
48
{
49
unsigned int eax, ebx, ecx, edx;
50
unsigned int highest_cstate = 0;
51
unsigned int highest_subcstate = 0;
52
int i;
53
54
if (!boot_cpu_has(X86_FEATURE_MWAIT))
55
return;
56
if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
57
return;
58
59
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
60
61
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
62
!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
63
return;
64
65
edx >>= MWAIT_SUBSTATE_SIZE;
66
for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
67
if (edx & MWAIT_SUBSTATE_MASK) {
68
highest_cstate = i;
69
highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
70
}
71
}
72
power_saving_mwait_eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
73
(highest_subcstate - 1);
74
75
#if defined(CONFIG_X86)
76
switch (boot_cpu_data.x86_vendor) {
77
case X86_VENDOR_AMD:
78
case X86_VENDOR_INTEL:
79
/*
80
* AMD Fam10h TSC will tick in all
81
* C/P/S0/S1 states when this bit is set.
82
*/
83
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
84
tsc_detected_unstable = 1;
85
if (!boot_cpu_has(X86_FEATURE_ARAT))
86
lapic_detected_unstable = 1;
87
break;
88
default:
89
/* TSC & LAPIC could halt in idle */
90
tsc_detected_unstable = 1;
91
lapic_detected_unstable = 1;
92
}
93
#endif
94
}
95
96
static unsigned long cpu_weight[NR_CPUS];
97
static int tsk_in_cpu[NR_CPUS] = {[0 ... NR_CPUS-1] = -1};
98
static DECLARE_BITMAP(pad_busy_cpus_bits, NR_CPUS);
99
static void round_robin_cpu(unsigned int tsk_index)
100
{
101
struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
102
cpumask_var_t tmp;
103
int cpu;
104
unsigned long min_weight = -1;
105
unsigned long uninitialized_var(preferred_cpu);
106
107
if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
108
return;
109
110
mutex_lock(&isolated_cpus_lock);
111
cpumask_clear(tmp);
112
for_each_cpu(cpu, pad_busy_cpus)
113
cpumask_or(tmp, tmp, topology_thread_cpumask(cpu));
114
cpumask_andnot(tmp, cpu_online_mask, tmp);
115
/* avoid HT sibilings if possible */
116
if (cpumask_empty(tmp))
117
cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
118
if (cpumask_empty(tmp)) {
119
mutex_unlock(&isolated_cpus_lock);
120
return;
121
}
122
for_each_cpu(cpu, tmp) {
123
if (cpu_weight[cpu] < min_weight) {
124
min_weight = cpu_weight[cpu];
125
preferred_cpu = cpu;
126
}
127
}
128
129
if (tsk_in_cpu[tsk_index] != -1)
130
cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
131
tsk_in_cpu[tsk_index] = preferred_cpu;
132
cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
133
cpu_weight[preferred_cpu]++;
134
mutex_unlock(&isolated_cpus_lock);
135
136
set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
137
}
138
139
static void exit_round_robin(unsigned int tsk_index)
140
{
141
struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
142
cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
143
tsk_in_cpu[tsk_index] = -1;
144
}
145
146
static unsigned int idle_pct = 5; /* percentage */
147
static unsigned int round_robin_time = 10; /* second */
148
static int power_saving_thread(void *data)
149
{
150
struct sched_param param = {.sched_priority = 1};
151
int do_sleep;
152
unsigned int tsk_index = (unsigned long)data;
153
u64 last_jiffies = 0;
154
155
sched_setscheduler(current, SCHED_RR, &param);
156
157
while (!kthread_should_stop()) {
158
int cpu;
159
u64 expire_time;
160
161
try_to_freeze();
162
163
/* round robin to cpus */
164
if (last_jiffies + round_robin_time * HZ < jiffies) {
165
last_jiffies = jiffies;
166
round_robin_cpu(tsk_index);
167
}
168
169
do_sleep = 0;
170
171
expire_time = jiffies + HZ * (100 - idle_pct) / 100;
172
173
while (!need_resched()) {
174
if (tsc_detected_unstable && !tsc_marked_unstable) {
175
/* TSC could halt in idle, so notify users */
176
mark_tsc_unstable("TSC halts in idle");
177
tsc_marked_unstable = 1;
178
}
179
if (lapic_detected_unstable && !lapic_marked_unstable) {
180
int i;
181
/* LAPIC could halt in idle, so notify users */
182
for_each_online_cpu(i)
183
clockevents_notify(
184
CLOCK_EVT_NOTIFY_BROADCAST_ON,
185
&i);
186
lapic_marked_unstable = 1;
187
}
188
local_irq_disable();
189
cpu = smp_processor_id();
190
if (lapic_marked_unstable)
191
clockevents_notify(
192
CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
193
stop_critical_timings();
194
195
__monitor((void *)&current_thread_info()->flags, 0, 0);
196
smp_mb();
197
if (!need_resched())
198
__mwait(power_saving_mwait_eax, 1);
199
200
start_critical_timings();
201
if (lapic_marked_unstable)
202
clockevents_notify(
203
CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
204
local_irq_enable();
205
206
if (jiffies > expire_time) {
207
do_sleep = 1;
208
break;
209
}
210
}
211
212
/*
213
* current sched_rt has threshold for rt task running time.
214
* When a rt task uses 95% CPU time, the rt thread will be
215
* scheduled out for 5% CPU time to not starve other tasks. But
216
* the mechanism only works when all CPUs have RT task running,
217
* as if one CPU hasn't RT task, RT task from other CPUs will
218
* borrow CPU time from this CPU and cause RT task use > 95%
219
* CPU time. To make 'avoid starvation' work, takes a nap here.
220
*/
221
if (do_sleep)
222
schedule_timeout_killable(HZ * idle_pct / 100);
223
}
224
225
exit_round_robin(tsk_index);
226
return 0;
227
}
228
229
static struct task_struct *ps_tsks[NR_CPUS];
230
static unsigned int ps_tsk_num;
231
static int create_power_saving_task(void)
232
{
233
int rc = -ENOMEM;
234
235
ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread,
236
(void *)(unsigned long)ps_tsk_num,
237
"power_saving/%d", ps_tsk_num);
238
rc = IS_ERR(ps_tsks[ps_tsk_num]) ? PTR_ERR(ps_tsks[ps_tsk_num]) : 0;
239
if (!rc)
240
ps_tsk_num++;
241
else
242
ps_tsks[ps_tsk_num] = NULL;
243
244
return rc;
245
}
246
247
static void destroy_power_saving_task(void)
248
{
249
if (ps_tsk_num > 0) {
250
ps_tsk_num--;
251
kthread_stop(ps_tsks[ps_tsk_num]);
252
ps_tsks[ps_tsk_num] = NULL;
253
}
254
}
255
256
static void set_power_saving_task_num(unsigned int num)
257
{
258
if (num > ps_tsk_num) {
259
while (ps_tsk_num < num) {
260
if (create_power_saving_task())
261
return;
262
}
263
} else if (num < ps_tsk_num) {
264
while (ps_tsk_num > num)
265
destroy_power_saving_task();
266
}
267
}
268
269
static void acpi_pad_idle_cpus(unsigned int num_cpus)
270
{
271
get_online_cpus();
272
273
num_cpus = min_t(unsigned int, num_cpus, num_online_cpus());
274
set_power_saving_task_num(num_cpus);
275
276
put_online_cpus();
277
}
278
279
static uint32_t acpi_pad_idle_cpus_num(void)
280
{
281
return ps_tsk_num;
282
}
283
284
static ssize_t acpi_pad_rrtime_store(struct device *dev,
285
struct device_attribute *attr, const char *buf, size_t count)
286
{
287
unsigned long num;
288
if (strict_strtoul(buf, 0, &num))
289
return -EINVAL;
290
if (num < 1 || num >= 100)
291
return -EINVAL;
292
mutex_lock(&isolated_cpus_lock);
293
round_robin_time = num;
294
mutex_unlock(&isolated_cpus_lock);
295
return count;
296
}
297
298
static ssize_t acpi_pad_rrtime_show(struct device *dev,
299
struct device_attribute *attr, char *buf)
300
{
301
return scnprintf(buf, PAGE_SIZE, "%d\n", round_robin_time);
302
}
303
static DEVICE_ATTR(rrtime, S_IRUGO|S_IWUSR,
304
acpi_pad_rrtime_show,
305
acpi_pad_rrtime_store);
306
307
static ssize_t acpi_pad_idlepct_store(struct device *dev,
308
struct device_attribute *attr, const char *buf, size_t count)
309
{
310
unsigned long num;
311
if (strict_strtoul(buf, 0, &num))
312
return -EINVAL;
313
if (num < 1 || num >= 100)
314
return -EINVAL;
315
mutex_lock(&isolated_cpus_lock);
316
idle_pct = num;
317
mutex_unlock(&isolated_cpus_lock);
318
return count;
319
}
320
321
static ssize_t acpi_pad_idlepct_show(struct device *dev,
322
struct device_attribute *attr, char *buf)
323
{
324
return scnprintf(buf, PAGE_SIZE, "%d\n", idle_pct);
325
}
326
static DEVICE_ATTR(idlepct, S_IRUGO|S_IWUSR,
327
acpi_pad_idlepct_show,
328
acpi_pad_idlepct_store);
329
330
static ssize_t acpi_pad_idlecpus_store(struct device *dev,
331
struct device_attribute *attr, const char *buf, size_t count)
332
{
333
unsigned long num;
334
if (strict_strtoul(buf, 0, &num))
335
return -EINVAL;
336
mutex_lock(&isolated_cpus_lock);
337
acpi_pad_idle_cpus(num);
338
mutex_unlock(&isolated_cpus_lock);
339
return count;
340
}
341
342
static ssize_t acpi_pad_idlecpus_show(struct device *dev,
343
struct device_attribute *attr, char *buf)
344
{
345
int n = 0;
346
n = cpumask_scnprintf(buf, PAGE_SIZE-2, to_cpumask(pad_busy_cpus_bits));
347
buf[n++] = '\n';
348
buf[n] = '\0';
349
return n;
350
}
351
static DEVICE_ATTR(idlecpus, S_IRUGO|S_IWUSR,
352
acpi_pad_idlecpus_show,
353
acpi_pad_idlecpus_store);
354
355
static int acpi_pad_add_sysfs(struct acpi_device *device)
356
{
357
int result;
358
359
result = device_create_file(&device->dev, &dev_attr_idlecpus);
360
if (result)
361
return -ENODEV;
362
result = device_create_file(&device->dev, &dev_attr_idlepct);
363
if (result) {
364
device_remove_file(&device->dev, &dev_attr_idlecpus);
365
return -ENODEV;
366
}
367
result = device_create_file(&device->dev, &dev_attr_rrtime);
368
if (result) {
369
device_remove_file(&device->dev, &dev_attr_idlecpus);
370
device_remove_file(&device->dev, &dev_attr_idlepct);
371
return -ENODEV;
372
}
373
return 0;
374
}
375
376
static void acpi_pad_remove_sysfs(struct acpi_device *device)
377
{
378
device_remove_file(&device->dev, &dev_attr_idlecpus);
379
device_remove_file(&device->dev, &dev_attr_idlepct);
380
device_remove_file(&device->dev, &dev_attr_rrtime);
381
}
382
383
/*
384
* Query firmware how many CPUs should be idle
385
* return -1 on failure
386
*/
387
static int acpi_pad_pur(acpi_handle handle)
388
{
389
struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
390
union acpi_object *package;
391
int num = -1;
392
393
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer)))
394
return num;
395
396
if (!buffer.length || !buffer.pointer)
397
return num;
398
399
package = buffer.pointer;
400
401
if (package->type == ACPI_TYPE_PACKAGE &&
402
package->package.count == 2 &&
403
package->package.elements[0].integer.value == 1) /* rev 1 */
404
405
num = package->package.elements[1].integer.value;
406
407
kfree(buffer.pointer);
408
return num;
409
}
410
411
/* Notify firmware how many CPUs are idle */
412
static void acpi_pad_ost(acpi_handle handle, int stat,
413
uint32_t idle_cpus)
414
{
415
union acpi_object params[3] = {
416
{.type = ACPI_TYPE_INTEGER,},
417
{.type = ACPI_TYPE_INTEGER,},
418
{.type = ACPI_TYPE_BUFFER,},
419
};
420
struct acpi_object_list arg_list = {3, params};
421
422
params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY;
423
params[1].integer.value = stat;
424
params[2].buffer.length = 4;
425
params[2].buffer.pointer = (void *)&idle_cpus;
426
acpi_evaluate_object(handle, "_OST", &arg_list, NULL);
427
}
428
429
static void acpi_pad_handle_notify(acpi_handle handle)
430
{
431
int num_cpus;
432
uint32_t idle_cpus;
433
434
mutex_lock(&isolated_cpus_lock);
435
num_cpus = acpi_pad_pur(handle);
436
if (num_cpus < 0) {
437
mutex_unlock(&isolated_cpus_lock);
438
return;
439
}
440
acpi_pad_idle_cpus(num_cpus);
441
idle_cpus = acpi_pad_idle_cpus_num();
442
acpi_pad_ost(handle, 0, idle_cpus);
443
mutex_unlock(&isolated_cpus_lock);
444
}
445
446
static void acpi_pad_notify(acpi_handle handle, u32 event,
447
void *data)
448
{
449
struct acpi_device *device = data;
450
451
switch (event) {
452
case ACPI_PROCESSOR_AGGREGATOR_NOTIFY:
453
acpi_pad_handle_notify(handle);
454
acpi_bus_generate_proc_event(device, event, 0);
455
acpi_bus_generate_netlink_event(device->pnp.device_class,
456
dev_name(&device->dev), event, 0);
457
break;
458
default:
459
printk(KERN_WARNING "Unsupported event [0x%x]\n", event);
460
break;
461
}
462
}
463
464
static int acpi_pad_add(struct acpi_device *device)
465
{
466
acpi_status status;
467
468
strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME);
469
strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS);
470
471
if (acpi_pad_add_sysfs(device))
472
return -ENODEV;
473
474
status = acpi_install_notify_handler(device->handle,
475
ACPI_DEVICE_NOTIFY, acpi_pad_notify, device);
476
if (ACPI_FAILURE(status)) {
477
acpi_pad_remove_sysfs(device);
478
return -ENODEV;
479
}
480
481
return 0;
482
}
483
484
static int acpi_pad_remove(struct acpi_device *device,
485
int type)
486
{
487
mutex_lock(&isolated_cpus_lock);
488
acpi_pad_idle_cpus(0);
489
mutex_unlock(&isolated_cpus_lock);
490
491
acpi_remove_notify_handler(device->handle,
492
ACPI_DEVICE_NOTIFY, acpi_pad_notify);
493
acpi_pad_remove_sysfs(device);
494
return 0;
495
}
496
497
static const struct acpi_device_id pad_device_ids[] = {
498
{"ACPI000C", 0},
499
{"", 0},
500
};
501
MODULE_DEVICE_TABLE(acpi, pad_device_ids);
502
503
static struct acpi_driver acpi_pad_driver = {
504
.name = "processor_aggregator",
505
.class = ACPI_PROCESSOR_AGGREGATOR_CLASS,
506
.ids = pad_device_ids,
507
.ops = {
508
.add = acpi_pad_add,
509
.remove = acpi_pad_remove,
510
},
511
};
512
513
static int __init acpi_pad_init(void)
514
{
515
power_saving_mwait_init();
516
if (power_saving_mwait_eax == 0)
517
return -EINVAL;
518
519
return acpi_bus_register_driver(&acpi_pad_driver);
520
}
521
522
static void __exit acpi_pad_exit(void)
523
{
524
acpi_bus_unregister_driver(&acpi_pad_driver);
525
}
526
527
module_init(acpi_pad_init);
528
module_exit(acpi_pad_exit);
529
MODULE_AUTHOR("Shaohua Li<[email protected]>");
530
MODULE_DESCRIPTION("ACPI Processor Aggregator Driver");
531
MODULE_LICENSE("GPL");
532
533