Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/kernel/cpu/mcheck/therm_throt.c
10775 views
1
/*
2
* Thermal throttle event support code (such as syslog messaging and rate
3
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
4
*
5
* This allows consistent reporting of CPU thermal throttle events.
6
*
7
* Maintains a counter in /sys that keeps track of the number of thermal
8
* events, such that the user knows how bad the thermal problem might be
9
* (since the logging to syslog and mcelog is rate limited).
10
*
11
* Author: Dmitriy Zavin ([email protected])
12
*
13
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14
* Inspired by Ross Biro's and Al Borchers' counter code.
15
*/
16
#include <linux/interrupt.h>
17
#include <linux/notifier.h>
18
#include <linux/jiffies.h>
19
#include <linux/kernel.h>
20
#include <linux/percpu.h>
21
#include <linux/sysdev.h>
22
#include <linux/types.h>
23
#include <linux/init.h>
24
#include <linux/smp.h>
25
#include <linux/cpu.h>
26
27
#include <asm/processor.h>
28
#include <asm/system.h>
29
#include <asm/apic.h>
30
#include <asm/idle.h>
31
#include <asm/mce.h>
32
#include <asm/msr.h>
33
34
/* How long to wait between reporting thermal events */
35
#define CHECK_INTERVAL (300 * HZ)
36
37
#define THERMAL_THROTTLING_EVENT 0
38
#define POWER_LIMIT_EVENT 1
39
40
/*
41
* Current thermal event state:
42
*/
43
struct _thermal_state {
44
bool new_event;
45
int event;
46
u64 next_check;
47
unsigned long count;
48
unsigned long last_count;
49
};
50
51
struct thermal_state {
52
struct _thermal_state core_throttle;
53
struct _thermal_state core_power_limit;
54
struct _thermal_state package_throttle;
55
struct _thermal_state package_power_limit;
56
struct _thermal_state core_thresh0;
57
struct _thermal_state core_thresh1;
58
};
59
60
/* Callback to handle core threshold interrupts */
61
int (*platform_thermal_notify)(__u64 msr_val);
62
EXPORT_SYMBOL(platform_thermal_notify);
63
64
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
65
66
static atomic_t therm_throt_en = ATOMIC_INIT(0);
67
68
static u32 lvtthmr_init __read_mostly;
69
70
#ifdef CONFIG_SYSFS
71
#define define_therm_throt_sysdev_one_ro(_name) \
72
static SYSDEV_ATTR(_name, 0444, \
73
therm_throt_sysdev_show_##_name, \
74
NULL) \
75
76
#define define_therm_throt_sysdev_show_func(event, name) \
77
\
78
static ssize_t therm_throt_sysdev_show_##event##_##name( \
79
struct sys_device *dev, \
80
struct sysdev_attribute *attr, \
81
char *buf) \
82
{ \
83
unsigned int cpu = dev->id; \
84
ssize_t ret; \
85
\
86
preempt_disable(); /* CPU hotplug */ \
87
if (cpu_online(cpu)) { \
88
ret = sprintf(buf, "%lu\n", \
89
per_cpu(thermal_state, cpu).event.name); \
90
} else \
91
ret = 0; \
92
preempt_enable(); \
93
\
94
return ret; \
95
}
96
97
define_therm_throt_sysdev_show_func(core_throttle, count);
98
define_therm_throt_sysdev_one_ro(core_throttle_count);
99
100
define_therm_throt_sysdev_show_func(core_power_limit, count);
101
define_therm_throt_sysdev_one_ro(core_power_limit_count);
102
103
define_therm_throt_sysdev_show_func(package_throttle, count);
104
define_therm_throt_sysdev_one_ro(package_throttle_count);
105
106
define_therm_throt_sysdev_show_func(package_power_limit, count);
107
define_therm_throt_sysdev_one_ro(package_power_limit_count);
108
109
static struct attribute *thermal_throttle_attrs[] = {
110
&attr_core_throttle_count.attr,
111
NULL
112
};
113
114
static struct attribute_group thermal_attr_group = {
115
.attrs = thermal_throttle_attrs,
116
.name = "thermal_throttle"
117
};
118
#endif /* CONFIG_SYSFS */
119
120
#define CORE_LEVEL 0
121
#define PACKAGE_LEVEL 1
122
123
/***
124
* therm_throt_process - Process thermal throttling event from interrupt
125
* @curr: Whether the condition is current or not (boolean), since the
126
* thermal interrupt normally gets called both when the thermal
127
* event begins and once the event has ended.
128
*
129
* This function is called by the thermal interrupt after the
130
* IRQ has been acknowledged.
131
*
132
* It will take care of rate limiting and printing messages to the syslog.
133
*
134
* Returns: 0 : Event should NOT be further logged, i.e. still in
135
* "timeout" from previous log message.
136
* 1 : Event should be logged further, and a message has been
137
* printed to the syslog.
138
*/
139
static int therm_throt_process(bool new_event, int event, int level)
140
{
141
struct _thermal_state *state;
142
unsigned int this_cpu = smp_processor_id();
143
bool old_event;
144
u64 now;
145
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
146
147
now = get_jiffies_64();
148
if (level == CORE_LEVEL) {
149
if (event == THERMAL_THROTTLING_EVENT)
150
state = &pstate->core_throttle;
151
else if (event == POWER_LIMIT_EVENT)
152
state = &pstate->core_power_limit;
153
else
154
return 0;
155
} else if (level == PACKAGE_LEVEL) {
156
if (event == THERMAL_THROTTLING_EVENT)
157
state = &pstate->package_throttle;
158
else if (event == POWER_LIMIT_EVENT)
159
state = &pstate->package_power_limit;
160
else
161
return 0;
162
} else
163
return 0;
164
165
old_event = state->new_event;
166
state->new_event = new_event;
167
168
if (new_event)
169
state->count++;
170
171
if (time_before64(now, state->next_check) &&
172
state->count != state->last_count)
173
return 0;
174
175
state->next_check = now + CHECK_INTERVAL;
176
state->last_count = state->count;
177
178
/* if we just entered the thermal event */
179
if (new_event) {
180
if (event == THERMAL_THROTTLING_EVENT)
181
printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
182
this_cpu,
183
level == CORE_LEVEL ? "Core" : "Package",
184
state->count);
185
else
186
printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
187
this_cpu,
188
level == CORE_LEVEL ? "Core" : "Package",
189
state->count);
190
return 1;
191
}
192
if (old_event) {
193
if (event == THERMAL_THROTTLING_EVENT)
194
printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
195
this_cpu,
196
level == CORE_LEVEL ? "Core" : "Package");
197
else
198
printk(KERN_INFO "CPU%d: %s power limit normal\n",
199
this_cpu,
200
level == CORE_LEVEL ? "Core" : "Package");
201
return 1;
202
}
203
204
return 0;
205
}
206
207
static int thresh_event_valid(int event)
208
{
209
struct _thermal_state *state;
210
unsigned int this_cpu = smp_processor_id();
211
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
212
u64 now = get_jiffies_64();
213
214
state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
215
216
if (time_before64(now, state->next_check))
217
return 0;
218
219
state->next_check = now + CHECK_INTERVAL;
220
return 1;
221
}
222
223
#ifdef CONFIG_SYSFS
224
/* Add/Remove thermal_throttle interface for CPU device: */
225
static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
226
unsigned int cpu)
227
{
228
int err;
229
struct cpuinfo_x86 *c = &cpu_data(cpu);
230
231
err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
232
if (err)
233
return err;
234
235
if (cpu_has(c, X86_FEATURE_PLN))
236
err = sysfs_add_file_to_group(&sys_dev->kobj,
237
&attr_core_power_limit_count.attr,
238
thermal_attr_group.name);
239
if (cpu_has(c, X86_FEATURE_PTS)) {
240
err = sysfs_add_file_to_group(&sys_dev->kobj,
241
&attr_package_throttle_count.attr,
242
thermal_attr_group.name);
243
if (cpu_has(c, X86_FEATURE_PLN))
244
err = sysfs_add_file_to_group(&sys_dev->kobj,
245
&attr_package_power_limit_count.attr,
246
thermal_attr_group.name);
247
}
248
249
return err;
250
}
251
252
static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
253
{
254
sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group);
255
}
256
257
/* Mutex protecting device creation against CPU hotplug: */
258
static DEFINE_MUTEX(therm_cpu_lock);
259
260
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
261
static __cpuinit int
262
thermal_throttle_cpu_callback(struct notifier_block *nfb,
263
unsigned long action,
264
void *hcpu)
265
{
266
unsigned int cpu = (unsigned long)hcpu;
267
struct sys_device *sys_dev;
268
int err = 0;
269
270
sys_dev = get_cpu_sysdev(cpu);
271
272
switch (action) {
273
case CPU_UP_PREPARE:
274
case CPU_UP_PREPARE_FROZEN:
275
mutex_lock(&therm_cpu_lock);
276
err = thermal_throttle_add_dev(sys_dev, cpu);
277
mutex_unlock(&therm_cpu_lock);
278
WARN_ON(err);
279
break;
280
case CPU_UP_CANCELED:
281
case CPU_UP_CANCELED_FROZEN:
282
case CPU_DEAD:
283
case CPU_DEAD_FROZEN:
284
mutex_lock(&therm_cpu_lock);
285
thermal_throttle_remove_dev(sys_dev);
286
mutex_unlock(&therm_cpu_lock);
287
break;
288
}
289
return notifier_from_errno(err);
290
}
291
292
static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
293
{
294
.notifier_call = thermal_throttle_cpu_callback,
295
};
296
297
static __init int thermal_throttle_init_device(void)
298
{
299
unsigned int cpu = 0;
300
int err;
301
302
if (!atomic_read(&therm_throt_en))
303
return 0;
304
305
register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
306
307
#ifdef CONFIG_HOTPLUG_CPU
308
mutex_lock(&therm_cpu_lock);
309
#endif
310
/* connect live CPUs to sysfs */
311
for_each_online_cpu(cpu) {
312
err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu);
313
WARN_ON(err);
314
}
315
#ifdef CONFIG_HOTPLUG_CPU
316
mutex_unlock(&therm_cpu_lock);
317
#endif
318
319
return 0;
320
}
321
device_initcall(thermal_throttle_init_device);
322
323
#endif /* CONFIG_SYSFS */
324
325
/*
326
* Set up the most two significant bit to notify mce log that this thermal
327
* event type.
328
* This is a temp solution. May be changed in the future with mce log
329
* infrasture.
330
*/
331
#define CORE_THROTTLED (0)
332
#define CORE_POWER_LIMIT ((__u64)1 << 62)
333
#define PACKAGE_THROTTLED ((__u64)2 << 62)
334
#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
335
336
static void notify_thresholds(__u64 msr_val)
337
{
338
/* check whether the interrupt handler is defined;
339
* otherwise simply return
340
*/
341
if (!platform_thermal_notify)
342
return;
343
344
/* lower threshold reached */
345
if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
346
platform_thermal_notify(msr_val);
347
/* higher threshold reached */
348
if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
349
platform_thermal_notify(msr_val);
350
}
351
352
/* Thermal transition interrupt handler */
353
static void intel_thermal_interrupt(void)
354
{
355
__u64 msr_val;
356
357
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
358
359
/* Check for violation of core thermal thresholds*/
360
notify_thresholds(msr_val);
361
362
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
363
THERMAL_THROTTLING_EVENT,
364
CORE_LEVEL) != 0)
365
mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
366
367
if (this_cpu_has(X86_FEATURE_PLN))
368
if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
369
POWER_LIMIT_EVENT,
370
CORE_LEVEL) != 0)
371
mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
372
373
if (this_cpu_has(X86_FEATURE_PTS)) {
374
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
375
if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
376
THERMAL_THROTTLING_EVENT,
377
PACKAGE_LEVEL) != 0)
378
mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
379
if (this_cpu_has(X86_FEATURE_PLN))
380
if (therm_throt_process(msr_val &
381
PACKAGE_THERM_STATUS_POWER_LIMIT,
382
POWER_LIMIT_EVENT,
383
PACKAGE_LEVEL) != 0)
384
mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
385
| msr_val);
386
}
387
}
388
389
static void unexpected_thermal_interrupt(void)
390
{
391
printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
392
smp_processor_id());
393
}
394
395
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
396
397
asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
398
{
399
exit_idle();
400
irq_enter();
401
inc_irq_stat(irq_thermal_count);
402
smp_thermal_vector();
403
irq_exit();
404
/* Ack only at the end to avoid potential reentry */
405
ack_APIC_irq();
406
}
407
408
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
409
static int intel_thermal_supported(struct cpuinfo_x86 *c)
410
{
411
if (!cpu_has_apic)
412
return 0;
413
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
414
return 0;
415
return 1;
416
}
417
418
void __init mcheck_intel_therm_init(void)
419
{
420
/*
421
* This function is only called on boot CPU. Save the init thermal
422
* LVT value on BSP and use that value to restore APs' thermal LVT
423
* entry BIOS programmed later
424
*/
425
if (intel_thermal_supported(&boot_cpu_data))
426
lvtthmr_init = apic_read(APIC_LVTTHMR);
427
}
428
429
void intel_init_thermal(struct cpuinfo_x86 *c)
430
{
431
unsigned int cpu = smp_processor_id();
432
int tm2 = 0;
433
u32 l, h;
434
435
if (!intel_thermal_supported(c))
436
return;
437
438
/*
439
* First check if its enabled already, in which case there might
440
* be some SMM goo which handles it, so we can't even put a handler
441
* since it might be delivered via SMI already:
442
*/
443
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
444
445
h = lvtthmr_init;
446
/*
447
* The initial value of thermal LVT entries on all APs always reads
448
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
449
* sequence to them and LVT registers are reset to 0s except for
450
* the mask bits which are set to 1s when APs receive INIT IPI.
451
* If BIOS takes over the thermal interrupt and sets its interrupt
452
* delivery mode to SMI (not fixed), it restores the value that the
453
* BIOS has programmed on AP based on BSP's info we saved since BIOS
454
* is always setting the same value for all threads/cores.
455
*/
456
if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
457
apic_write(APIC_LVTTHMR, lvtthmr_init);
458
459
460
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
461
printk(KERN_DEBUG
462
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
463
return;
464
}
465
466
/* Check whether a vector already exists */
467
if (h & APIC_VECTOR_MASK) {
468
printk(KERN_DEBUG
469
"CPU%d: Thermal LVT vector (%#x) already installed\n",
470
cpu, (h & APIC_VECTOR_MASK));
471
return;
472
}
473
474
/* early Pentium M models use different method for enabling TM2 */
475
if (cpu_has(c, X86_FEATURE_TM2)) {
476
if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
477
rdmsr(MSR_THERM2_CTL, l, h);
478
if (l & MSR_THERM2_CTL_TM_SELECT)
479
tm2 = 1;
480
} else if (l & MSR_IA32_MISC_ENABLE_TM2)
481
tm2 = 1;
482
}
483
484
/* We'll mask the thermal vector in the lapic till we're ready: */
485
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
486
apic_write(APIC_LVTTHMR, h);
487
488
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
489
if (cpu_has(c, X86_FEATURE_PLN))
490
wrmsr(MSR_IA32_THERM_INTERRUPT,
491
l | (THERM_INT_LOW_ENABLE
492
| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
493
else
494
wrmsr(MSR_IA32_THERM_INTERRUPT,
495
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
496
497
if (cpu_has(c, X86_FEATURE_PTS)) {
498
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
499
if (cpu_has(c, X86_FEATURE_PLN))
500
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
501
l | (PACKAGE_THERM_INT_LOW_ENABLE
502
| PACKAGE_THERM_INT_HIGH_ENABLE
503
| PACKAGE_THERM_INT_PLN_ENABLE), h);
504
else
505
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
506
l | (PACKAGE_THERM_INT_LOW_ENABLE
507
| PACKAGE_THERM_INT_HIGH_ENABLE), h);
508
}
509
510
smp_thermal_vector = intel_thermal_interrupt;
511
512
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
513
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
514
515
/* Unmask the thermal vector: */
516
l = apic_read(APIC_LVTTHMR);
517
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
518
519
printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
520
tm2 ? "TM2" : "TM1");
521
522
/* enable thermal throttle processing */
523
atomic_set(&therm_throt_en, 1);
524
}
525
526