Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/cacheinfo.c
26493 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* x86 CPU caches detection and configuration
4
*
5
* Previous changes
6
* - Venkatesh Pallipadi: Cache identification through CPUID(0x4)
7
* - Ashok Raj <[email protected]>: Work with CPU hotplug infrastructure
8
* - Andi Kleen / Andreas Herrmann: CPUID(0x4) emulation on AMD
9
*/
10
11
#include <linux/cacheinfo.h>
12
#include <linux/cpu.h>
13
#include <linux/cpuhotplug.h>
14
#include <linux/stop_machine.h>
15
16
#include <asm/amd/nb.h>
17
#include <asm/cacheinfo.h>
18
#include <asm/cpufeature.h>
19
#include <asm/cpuid/api.h>
20
#include <asm/mtrr.h>
21
#include <asm/smp.h>
22
#include <asm/tlbflush.h>
23
24
#include "cpu.h"
25
26
/* Shared last level cache maps */
27
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
28
29
/* Shared L2 cache maps */
30
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
31
32
static cpumask_var_t cpu_cacheinfo_mask;
33
34
/* Kernel controls MTRR and/or PAT MSRs. */
35
unsigned int memory_caching_control __ro_after_init;
36
37
enum _cache_type {
38
CTYPE_NULL = 0,
39
CTYPE_DATA = 1,
40
CTYPE_INST = 2,
41
CTYPE_UNIFIED = 3
42
};
43
44
union _cpuid4_leaf_eax {
45
struct {
46
enum _cache_type type :5;
47
unsigned int level :3;
48
unsigned int is_self_initializing :1;
49
unsigned int is_fully_associative :1;
50
unsigned int reserved :4;
51
unsigned int num_threads_sharing :12;
52
unsigned int num_cores_on_die :6;
53
} split;
54
u32 full;
55
};
56
57
union _cpuid4_leaf_ebx {
58
struct {
59
unsigned int coherency_line_size :12;
60
unsigned int physical_line_partition :10;
61
unsigned int ways_of_associativity :10;
62
} split;
63
u32 full;
64
};
65
66
union _cpuid4_leaf_ecx {
67
struct {
68
unsigned int number_of_sets :32;
69
} split;
70
u32 full;
71
};
72
73
struct _cpuid4_info {
74
union _cpuid4_leaf_eax eax;
75
union _cpuid4_leaf_ebx ebx;
76
union _cpuid4_leaf_ecx ecx;
77
unsigned int id;
78
unsigned long size;
79
};
80
81
/* Map CPUID(0x4) EAX.cache_type to <linux/cacheinfo.h> types */
82
static const enum cache_type cache_type_map[] = {
83
[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
84
[CTYPE_DATA] = CACHE_TYPE_DATA,
85
[CTYPE_INST] = CACHE_TYPE_INST,
86
[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
87
};
88
89
/*
90
* Fallback AMD CPUID(0x4) emulation
91
* AMD CPUs with TOPOEXT can just use CPUID(0x8000001d)
92
*
93
* @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should
94
* be determined from CPUID(0x8000001d) instead of CPUID(0x80000006).
95
*/
96
97
#define AMD_CPUID4_FULLY_ASSOCIATIVE 0xffff
98
#define AMD_L2_L3_INVALID_ASSOC 0x9
99
100
union l1_cache {
101
struct {
102
unsigned line_size :8;
103
unsigned lines_per_tag :8;
104
unsigned assoc :8;
105
unsigned size_in_kb :8;
106
};
107
unsigned int val;
108
};
109
110
union l2_cache {
111
struct {
112
unsigned line_size :8;
113
unsigned lines_per_tag :4;
114
unsigned assoc :4;
115
unsigned size_in_kb :16;
116
};
117
unsigned int val;
118
};
119
120
union l3_cache {
121
struct {
122
unsigned line_size :8;
123
unsigned lines_per_tag :4;
124
unsigned assoc :4;
125
unsigned res :2;
126
unsigned size_encoded :14;
127
};
128
unsigned int val;
129
};
130
131
/* L2/L3 associativity mapping */
132
static const unsigned short assocs[] = {
133
[1] = 1,
134
[2] = 2,
135
[3] = 3,
136
[4] = 4,
137
[5] = 6,
138
[6] = 8,
139
[8] = 16,
140
[0xa] = 32,
141
[0xb] = 48,
142
[0xc] = 64,
143
[0xd] = 96,
144
[0xe] = 128,
145
[0xf] = AMD_CPUID4_FULLY_ASSOCIATIVE
146
};
147
148
static const unsigned char levels[] = { 1, 1, 2, 3 };
149
static const unsigned char types[] = { 1, 2, 3, 3 };
150
151
static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax,
152
union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx)
153
{
154
unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb;
155
union l1_cache l1i, l1d, *l1;
156
union l2_cache l2;
157
union l3_cache l3;
158
159
eax->full = 0;
160
ebx->full = 0;
161
ecx->full = 0;
162
163
cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
164
cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
165
166
l1 = &l1d;
167
switch (index) {
168
case 1:
169
l1 = &l1i;
170
fallthrough;
171
case 0:
172
if (!l1->val)
173
return;
174
175
assoc = (l1->assoc == 0xff) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc;
176
line_size = l1->line_size;
177
lines_per_tag = l1->lines_per_tag;
178
size_in_kb = l1->size_in_kb;
179
break;
180
case 2:
181
if (!l2.assoc || l2.assoc == AMD_L2_L3_INVALID_ASSOC)
182
return;
183
184
/* Use x86_cache_size as it might have K7 errata fixes */
185
assoc = assocs[l2.assoc];
186
line_size = l2.line_size;
187
lines_per_tag = l2.lines_per_tag;
188
size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
189
break;
190
case 3:
191
if (!l3.assoc || l3.assoc == AMD_L2_L3_INVALID_ASSOC)
192
return;
193
194
assoc = assocs[l3.assoc];
195
line_size = l3.line_size;
196
lines_per_tag = l3.lines_per_tag;
197
size_in_kb = l3.size_encoded * 512;
198
if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
199
size_in_kb = size_in_kb >> 1;
200
assoc = assoc >> 1;
201
}
202
break;
203
default:
204
return;
205
}
206
207
eax->split.is_self_initializing = 1;
208
eax->split.type = types[index];
209
eax->split.level = levels[index];
210
eax->split.num_threads_sharing = 0;
211
eax->split.num_cores_on_die = topology_num_cores_per_package();
212
213
if (assoc == AMD_CPUID4_FULLY_ASSOCIATIVE)
214
eax->split.is_fully_associative = 1;
215
216
ebx->split.coherency_line_size = line_size - 1;
217
ebx->split.ways_of_associativity = assoc - 1;
218
ebx->split.physical_line_partition = lines_per_tag - 1;
219
ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
220
(ebx->split.ways_of_associativity + 1) - 1;
221
}
222
223
static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax,
224
union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx)
225
{
226
if (eax.split.type == CTYPE_NULL)
227
return -EIO;
228
229
id4->eax = eax;
230
id4->ebx = ebx;
231
id4->ecx = ecx;
232
id4->size = (ecx.split.number_of_sets + 1) *
233
(ebx.split.coherency_line_size + 1) *
234
(ebx.split.physical_line_partition + 1) *
235
(ebx.split.ways_of_associativity + 1);
236
237
return 0;
238
}
239
240
static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4)
241
{
242
union _cpuid4_leaf_eax eax;
243
union _cpuid4_leaf_ebx ebx;
244
union _cpuid4_leaf_ecx ecx;
245
u32 ignored;
246
247
if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
248
cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored);
249
else
250
legacy_amd_cpuid4(index, &eax, &ebx, &ecx);
251
252
return cpuid4_info_fill_done(id4, eax, ebx, ecx);
253
}
254
255
static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4)
256
{
257
union _cpuid4_leaf_eax eax;
258
union _cpuid4_leaf_ebx ebx;
259
union _cpuid4_leaf_ecx ecx;
260
u32 ignored;
261
262
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored);
263
264
return cpuid4_info_fill_done(id4, eax, ebx, ecx);
265
}
266
267
static int fill_cpuid4_info(int index, struct _cpuid4_info *id4)
268
{
269
u8 cpu_vendor = boot_cpu_data.x86_vendor;
270
271
return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ?
272
amd_fill_cpuid4_info(index, id4) :
273
intel_fill_cpuid4_info(index, id4);
274
}
275
276
static int find_num_cache_leaves(struct cpuinfo_x86 *c)
277
{
278
unsigned int eax, ebx, ecx, edx, op;
279
union _cpuid4_leaf_eax cache_eax;
280
int i = -1;
281
282
/* Do a CPUID(op) loop to calculate num_cache_leaves */
283
op = (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) ? 0x8000001d : 4;
284
do {
285
++i;
286
cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
287
cache_eax.full = eax;
288
} while (cache_eax.split.type != CTYPE_NULL);
289
return i;
290
}
291
292
/*
293
* AMD/Hygon CPUs may have multiple LLCs if L3 caches exist.
294
*/
295
296
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
297
{
298
if (!cpuid_amd_hygon_has_l3_cache())
299
return;
300
301
if (c->x86 < 0x17) {
302
/* Pre-Zen: LLC is at the node level */
303
c->topo.llc_id = die_id;
304
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
305
/*
306
* Family 17h up to 1F models: LLC is at the core
307
* complex level. Core complex ID is ApicId[3].
308
*/
309
c->topo.llc_id = c->topo.apicid >> 3;
310
} else {
311
/*
312
* Newer families: LLC ID is calculated from the number
313
* of threads sharing the L3 cache.
314
*/
315
u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
316
u32 llc_index = find_num_cache_leaves(c) - 1;
317
318
cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
319
if (eax)
320
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
321
322
if (num_sharing_cache) {
323
int index_msb = get_count_order(num_sharing_cache);
324
325
c->topo.llc_id = c->topo.apicid >> index_msb;
326
}
327
}
328
}
329
330
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
331
{
332
if (!cpuid_amd_hygon_has_l3_cache())
333
return;
334
335
/*
336
* Hygons are similar to AMD Family 17h up to 1F models: LLC is
337
* at the core complex level. Core complex ID is ApicId[3].
338
*/
339
c->topo.llc_id = c->topo.apicid >> 3;
340
}
341
342
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
343
{
344
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
345
346
if (boot_cpu_has(X86_FEATURE_TOPOEXT))
347
ci->num_leaves = find_num_cache_leaves(c);
348
else if (c->extended_cpuid_level >= 0x80000006)
349
ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3;
350
}
351
352
void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
353
{
354
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
355
356
ci->num_leaves = find_num_cache_leaves(c);
357
}
358
359
static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3,
360
unsigned int l2, unsigned int l1i, unsigned int l1d)
361
{
362
/*
363
* If llc_id is still unset, then cpuid_level < 4, which implies
364
* that the only possibility left is SMT. Since CPUID(0x2) doesn't
365
* specify any shared caches and SMT shares all caches, we can
366
* unconditionally set LLC ID to the package ID so that all
367
* threads share it.
368
*/
369
if (c->topo.llc_id == BAD_APICID)
370
c->topo.llc_id = c->topo.pkg_id;
371
372
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : l1i + l1d);
373
374
if (!l2)
375
cpu_detect_cache_sizes(c);
376
}
377
378
/*
379
* Legacy Intel CPUID(0x2) path if CPUID(0x4) is not available.
380
*/
381
static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c)
382
{
383
unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
384
const struct leaf_0x2_table *desc;
385
union leaf_0x2_regs regs;
386
u8 *ptr;
387
388
if (c->cpuid_level < 2)
389
return;
390
391
cpuid_leaf_0x2(&regs);
392
for_each_cpuid_0x2_desc(regs, ptr, desc) {
393
switch (desc->c_type) {
394
case CACHE_L1_INST: l1i += desc->c_size; break;
395
case CACHE_L1_DATA: l1d += desc->c_size; break;
396
case CACHE_L2: l2 += desc->c_size; break;
397
case CACHE_L3: l3 += desc->c_size; break;
398
}
399
}
400
401
intel_cacheinfo_done(c, l3, l2, l1i, l1d);
402
}
403
404
static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4)
405
{
406
unsigned int num_threads_sharing;
407
int index_msb;
408
409
num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
410
index_msb = get_count_order(num_threads_sharing);
411
return c->topo.apicid & ~((1 << index_msb) - 1);
412
}
413
414
static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
415
{
416
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
417
unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID;
418
unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0;
419
420
if (c->cpuid_level < 4)
421
return false;
422
423
/*
424
* There should be at least one leaf. A non-zero value means
425
* that the number of leaves has been previously initialized.
426
*/
427
if (!ci->num_leaves)
428
ci->num_leaves = find_num_cache_leaves(c);
429
430
if (!ci->num_leaves)
431
return false;
432
433
for (int i = 0; i < ci->num_leaves; i++) {
434
struct _cpuid4_info id4 = {};
435
int ret;
436
437
ret = intel_fill_cpuid4_info(i, &id4);
438
if (ret < 0)
439
continue;
440
441
switch (id4.eax.split.level) {
442
case 1:
443
if (id4.eax.split.type == CTYPE_DATA)
444
l1d = id4.size / 1024;
445
else if (id4.eax.split.type == CTYPE_INST)
446
l1i = id4.size / 1024;
447
break;
448
case 2:
449
l2 = id4.size / 1024;
450
l2_id = calc_cache_topo_id(c, &id4);
451
break;
452
case 3:
453
l3 = id4.size / 1024;
454
l3_id = calc_cache_topo_id(c, &id4);
455
break;
456
default:
457
break;
458
}
459
}
460
461
c->topo.l2c_id = l2_id;
462
c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id;
463
intel_cacheinfo_done(c, l3, l2, l1i, l1d);
464
return true;
465
}
466
467
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
468
{
469
/* Don't use CPUID(0x2) if CPUID(0x4) is supported. */
470
if (intel_cacheinfo_0x4(c))
471
return;
472
473
intel_cacheinfo_0x2(c);
474
}
475
476
/*
477
* <linux/cacheinfo.h> shared_cpu_map setup, AMD/Hygon
478
*/
479
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
480
const struct _cpuid4_info *id4)
481
{
482
struct cpu_cacheinfo *this_cpu_ci;
483
struct cacheinfo *ci;
484
int i, sibling;
485
486
/*
487
* For L3, always use the pre-calculated cpu_llc_shared_mask
488
* to derive shared_cpu_map.
489
*/
490
if (index == 3) {
491
for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
492
this_cpu_ci = get_cpu_cacheinfo(i);
493
if (!this_cpu_ci->info_list)
494
continue;
495
496
ci = this_cpu_ci->info_list + index;
497
for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
498
if (!cpu_online(sibling))
499
continue;
500
cpumask_set_cpu(sibling, &ci->shared_cpu_map);
501
}
502
}
503
} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
504
unsigned int apicid, nshared, first, last;
505
506
nshared = id4->eax.split.num_threads_sharing + 1;
507
apicid = cpu_data(cpu).topo.apicid;
508
first = apicid - (apicid % nshared);
509
last = first + nshared - 1;
510
511
for_each_online_cpu(i) {
512
this_cpu_ci = get_cpu_cacheinfo(i);
513
if (!this_cpu_ci->info_list)
514
continue;
515
516
apicid = cpu_data(i).topo.apicid;
517
if ((apicid < first) || (apicid > last))
518
continue;
519
520
ci = this_cpu_ci->info_list + index;
521
522
for_each_online_cpu(sibling) {
523
apicid = cpu_data(sibling).topo.apicid;
524
if ((apicid < first) || (apicid > last))
525
continue;
526
cpumask_set_cpu(sibling, &ci->shared_cpu_map);
527
}
528
}
529
} else
530
return 0;
531
532
return 1;
533
}
534
535
/*
536
* <linux/cacheinfo.h> shared_cpu_map setup, Intel + fallback AMD/Hygon
537
*/
538
static void __cache_cpumap_setup(unsigned int cpu, int index,
539
const struct _cpuid4_info *id4)
540
{
541
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
542
struct cpuinfo_x86 *c = &cpu_data(cpu);
543
struct cacheinfo *ci, *sibling_ci;
544
unsigned long num_threads_sharing;
545
int index_msb, i;
546
547
if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
548
if (__cache_amd_cpumap_setup(cpu, index, id4))
549
return;
550
}
551
552
ci = this_cpu_ci->info_list + index;
553
num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
554
555
cpumask_set_cpu(cpu, &ci->shared_cpu_map);
556
if (num_threads_sharing == 1)
557
return;
558
559
index_msb = get_count_order(num_threads_sharing);
560
561
for_each_online_cpu(i)
562
if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
563
struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
564
565
/* Skip if itself or no cacheinfo */
566
if (i == cpu || !sib_cpu_ci->info_list)
567
continue;
568
569
sibling_ci = sib_cpu_ci->info_list + index;
570
cpumask_set_cpu(i, &ci->shared_cpu_map);
571
cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map);
572
}
573
}
574
575
static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4,
576
struct amd_northbridge *nb)
577
{
578
ci->id = id4->id;
579
ci->attributes = CACHE_ID;
580
ci->level = id4->eax.split.level;
581
ci->type = cache_type_map[id4->eax.split.type];
582
ci->coherency_line_size = id4->ebx.split.coherency_line_size + 1;
583
ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + 1;
584
ci->size = id4->size;
585
ci->number_of_sets = id4->ecx.split.number_of_sets + 1;
586
ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1;
587
ci->priv = nb;
588
}
589
590
int init_cache_level(unsigned int cpu)
591
{
592
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
593
594
/* There should be at least one leaf. */
595
if (!ci->num_leaves)
596
return -ENOENT;
597
598
return 0;
599
}
600
601
/*
602
* The max shared threads number comes from CPUID(0x4) EAX[25-14] with input
603
* ECX as cache index. Then right shift apicid by the number's order to get
604
* cache id for this cache node.
605
*/
606
static void get_cache_id(int cpu, struct _cpuid4_info *id4)
607
{
608
struct cpuinfo_x86 *c = &cpu_data(cpu);
609
unsigned long num_threads_sharing;
610
int index_msb;
611
612
num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
613
index_msb = get_count_order(num_threads_sharing);
614
id4->id = c->topo.apicid >> index_msb;
615
}
616
617
int populate_cache_leaves(unsigned int cpu)
618
{
619
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
620
struct cacheinfo *ci = this_cpu_ci->info_list;
621
u8 cpu_vendor = boot_cpu_data.x86_vendor;
622
struct amd_northbridge *nb = NULL;
623
struct _cpuid4_info id4 = {};
624
int idx, ret;
625
626
for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
627
ret = fill_cpuid4_info(idx, &id4);
628
if (ret)
629
return ret;
630
631
get_cache_id(cpu, &id4);
632
633
if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON)
634
nb = amd_init_l3_cache(idx);
635
636
ci_info_init(ci++, &id4, nb);
637
__cache_cpumap_setup(cpu, idx, &id4);
638
}
639
640
this_cpu_ci->cpu_map_populated = true;
641
return 0;
642
}
643
644
/*
645
* Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
646
*
647
* Since we are disabling the cache don't allow any interrupts,
648
* they would run extremely slow and would only increase the pain.
649
*
650
* The caller must ensure that local interrupts are disabled and
651
* are reenabled after cache_enable() has been called.
652
*/
653
static unsigned long saved_cr4;
654
static DEFINE_RAW_SPINLOCK(cache_disable_lock);
655
656
/*
657
* Cache flushing is the most time-consuming step when programming the
658
* MTRRs. On many Intel CPUs without known erratas, it can be skipped
659
* if the CPU declares cache self-snooping support.
660
*/
661
static void maybe_flush_caches(void)
662
{
663
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
664
wbinvd();
665
}
666
667
void cache_disable(void) __acquires(cache_disable_lock)
668
{
669
unsigned long cr0;
670
671
/*
672
* This is not ideal since the cache is only flushed/disabled
673
* for this CPU while the MTRRs are changed, but changing this
674
* requires more invasive changes to the way the kernel boots.
675
*/
676
raw_spin_lock(&cache_disable_lock);
677
678
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
679
cr0 = read_cr0() | X86_CR0_CD;
680
write_cr0(cr0);
681
682
maybe_flush_caches();
683
684
/* Save value of CR4 and clear Page Global Enable (bit 7) */
685
if (cpu_feature_enabled(X86_FEATURE_PGE)) {
686
saved_cr4 = __read_cr4();
687
__write_cr4(saved_cr4 & ~X86_CR4_PGE);
688
}
689
690
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
691
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
692
flush_tlb_local();
693
694
if (cpu_feature_enabled(X86_FEATURE_MTRR))
695
mtrr_disable();
696
697
maybe_flush_caches();
698
}
699
700
void cache_enable(void) __releases(cache_disable_lock)
701
{
702
/* Flush TLBs (no need to flush caches - they are disabled) */
703
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
704
flush_tlb_local();
705
706
if (cpu_feature_enabled(X86_FEATURE_MTRR))
707
mtrr_enable();
708
709
/* Enable caches */
710
write_cr0(read_cr0() & ~X86_CR0_CD);
711
712
/* Restore value of CR4 */
713
if (cpu_feature_enabled(X86_FEATURE_PGE))
714
__write_cr4(saved_cr4);
715
716
raw_spin_unlock(&cache_disable_lock);
717
}
718
719
static void cache_cpu_init(void)
720
{
721
unsigned long flags;
722
723
local_irq_save(flags);
724
725
if (memory_caching_control & CACHE_MTRR) {
726
cache_disable();
727
mtrr_generic_set_state();
728
cache_enable();
729
}
730
731
if (memory_caching_control & CACHE_PAT)
732
pat_cpu_init();
733
734
local_irq_restore(flags);
735
}
736
737
static bool cache_aps_delayed_init = true;
738
739
void set_cache_aps_delayed_init(bool val)
740
{
741
cache_aps_delayed_init = val;
742
}
743
744
bool get_cache_aps_delayed_init(void)
745
{
746
return cache_aps_delayed_init;
747
}
748
749
static int cache_rendezvous_handler(void *unused)
750
{
751
if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
752
cache_cpu_init();
753
754
return 0;
755
}
756
757
void __init cache_bp_init(void)
758
{
759
mtrr_bp_init();
760
pat_bp_init();
761
762
if (memory_caching_control)
763
cache_cpu_init();
764
}
765
766
void cache_bp_restore(void)
767
{
768
if (memory_caching_control)
769
cache_cpu_init();
770
}
771
772
static int cache_ap_online(unsigned int cpu)
773
{
774
cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
775
776
if (!memory_caching_control || get_cache_aps_delayed_init())
777
return 0;
778
779
/*
780
* Ideally we should hold mtrr_mutex here to avoid MTRR entries
781
* changed, but this routine will be called in CPU boot time,
782
* holding the lock breaks it.
783
*
784
* This routine is called in two cases:
785
*
786
* 1. very early time of software resume, when there absolutely
787
* isn't MTRR entry changes;
788
*
789
* 2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
790
* lock to prevent MTRR entry changes
791
*/
792
stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
793
cpu_cacheinfo_mask);
794
795
return 0;
796
}
797
798
static int cache_ap_offline(unsigned int cpu)
799
{
800
cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
801
return 0;
802
}
803
804
/*
805
* Delayed cache initialization for all AP's
806
*/
807
void cache_aps_init(void)
808
{
809
if (!memory_caching_control || !get_cache_aps_delayed_init())
810
return;
811
812
stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
813
set_cache_aps_delayed_init(false);
814
}
815
816
static int __init cache_ap_register(void)
817
{
818
zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
819
cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
820
821
cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
822
"x86/cachectrl:starting",
823
cache_ap_online, cache_ap_offline);
824
return 0;
825
}
826
early_initcall(cache_ap_register);
827
828