CoCalc -- cacheinfo.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/cacheinfo.c
²⁶⁴⁹³ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * x86 CPU caches detection and configuration
4
 *
5
 * Previous changes
6
 * - Venkatesh Pallipadi:		Cache identification through CPUID(0x4)
7
 * - Ashok Raj <[email protected]>:	Work with CPU hotplug infrastructure
8
 * - Andi Kleen / Andreas Herrmann:	CPUID(0x4) emulation on AMD
9
 */
10

11
#include <linux/cacheinfo.h>
12
#include <linux/cpu.h>
13
#include <linux/cpuhotplug.h>
14
#include <linux/stop_machine.h>
15

16
#include <asm/amd/nb.h>
17
#include <asm/cacheinfo.h>
18
#include <asm/cpufeature.h>
19
#include <asm/cpuid/api.h>
20
#include <asm/mtrr.h>
21
#include <asm/smp.h>
22
#include <asm/tlbflush.h>
23

24
#include "cpu.h"
25

26
/* Shared last level cache maps */
27
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
28

29
/* Shared L2 cache maps */
30
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
31

32
static cpumask_var_t cpu_cacheinfo_mask;
33

34
/* Kernel controls MTRR and/or PAT MSRs. */
35
unsigned int memory_caching_control __ro_after_init;
36

37
enum _cache_type {
38
	CTYPE_NULL	= 0,
39
	CTYPE_DATA	= 1,
40
	CTYPE_INST	= 2,
41
	CTYPE_UNIFIED	= 3
42
};
43

44
union _cpuid4_leaf_eax {
45
	struct {
46
		enum _cache_type	type			:5;
47
		unsigned int		level			:3;
48
		unsigned int		is_self_initializing	:1;
49
		unsigned int		is_fully_associative	:1;
50
		unsigned int		reserved		:4;
51
		unsigned int		num_threads_sharing	:12;
52
		unsigned int		num_cores_on_die	:6;
53
	} split;
54
	u32 full;
55
};
56

57
union _cpuid4_leaf_ebx {
58
	struct {
59
		unsigned int		coherency_line_size	:12;
60
		unsigned int		physical_line_partition	:10;
61
		unsigned int		ways_of_associativity	:10;
62
	} split;
63
	u32 full;
64
};
65

66
union _cpuid4_leaf_ecx {
67
	struct {
68
		unsigned int		number_of_sets		:32;
69
	} split;
70
	u32 full;
71
};
72

73
struct _cpuid4_info {
74
	union _cpuid4_leaf_eax eax;
75
	union _cpuid4_leaf_ebx ebx;
76
	union _cpuid4_leaf_ecx ecx;
77
	unsigned int id;
78
	unsigned long size;
79
};
80

81
/* Map CPUID(0x4) EAX.cache_type to <linux/cacheinfo.h> types */
82
static const enum cache_type cache_type_map[] = {
83
	[CTYPE_NULL]	= CACHE_TYPE_NOCACHE,
84
	[CTYPE_DATA]	= CACHE_TYPE_DATA,
85
	[CTYPE_INST]	= CACHE_TYPE_INST,
86
	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
87
};
88

89
/*
90
 * Fallback AMD CPUID(0x4) emulation
91
 * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d)
92
 *
93
 * @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should
94
 * be determined from CPUID(0x8000001d) instead of CPUID(0x80000006).
95
 */
96

97
#define AMD_CPUID4_FULLY_ASSOCIATIVE	0xffff
98
#define AMD_L2_L3_INVALID_ASSOC		0x9
99

100
union l1_cache {
101
	struct {
102
		unsigned line_size	:8;
103
		unsigned lines_per_tag	:8;
104
		unsigned assoc		:8;
105
		unsigned size_in_kb	:8;
106
	};
107
	unsigned int val;
108
};
109

110
union l2_cache {
111
	struct {
112
		unsigned line_size	:8;
113
		unsigned lines_per_tag	:4;
114
		unsigned assoc		:4;
115
		unsigned size_in_kb	:16;
116
	};
117
	unsigned int val;
118
};
119

120
union l3_cache {
121
	struct {
122
		unsigned line_size	:8;
123
		unsigned lines_per_tag	:4;
124
		unsigned assoc		:4;
125
		unsigned res		:2;
126
		unsigned size_encoded	:14;
127
	};
128
	unsigned int val;
129
};
130

131
/* L2/L3 associativity mapping */
132
static const unsigned short assocs[] = {
133
	[1]		= 1,
134
	[2]		= 2,
135
	[3]		= 3,
136
	[4]		= 4,
137
	[5]		= 6,
138
	[6]		= 8,
139
	[8]		= 16,
140
	[0xa]		= 32,
141
	[0xb]		= 48,
142
	[0xc]		= 64,
143
	[0xd]		= 96,
144
	[0xe]		= 128,
145
	[0xf]		= AMD_CPUID4_FULLY_ASSOCIATIVE
146
};
147

148
static const unsigned char levels[] = { 1, 1, 2, 3 };
149
static const unsigned char types[]  = { 1, 2, 3, 3 };
150

151
static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax,
152
			      union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx)
153
{
154
	unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb;
155
	union l1_cache l1i, l1d, *l1;
156
	union l2_cache l2;
157
	union l3_cache l3;
158

159
	eax->full = 0;
160
	ebx->full = 0;
161
	ecx->full = 0;
162

163
	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
164
	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
165

166
	l1 = &l1d;
167
	switch (index) {
168
	case 1:
169
		l1 = &l1i;
170
		fallthrough;
171
	case 0:
172
		if (!l1->val)
173
			return;
174

175
		assoc		= (l1->assoc == 0xff) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc;
176
		line_size	= l1->line_size;
177
		lines_per_tag	= l1->lines_per_tag;
178
		size_in_kb	= l1->size_in_kb;
179
		break;
180
	case 2:
181
		if (!l2.assoc || l2.assoc == AMD_L2_L3_INVALID_ASSOC)
182
			return;
183

184
		/* Use x86_cache_size as it might have K7 errata fixes */
185
		assoc		= assocs[l2.assoc];
186
		line_size	= l2.line_size;
187
		lines_per_tag	= l2.lines_per_tag;
188
		size_in_kb	= __this_cpu_read(cpu_info.x86_cache_size);
189
		break;
190
	case 3:
191
		if (!l3.assoc || l3.assoc == AMD_L2_L3_INVALID_ASSOC)
192
			return;
193

194
		assoc		= assocs[l3.assoc];
195
		line_size	= l3.line_size;
196
		lines_per_tag	= l3.lines_per_tag;
197
		size_in_kb	= l3.size_encoded * 512;
198
		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
199
			size_in_kb	= size_in_kb >> 1;
200
			assoc		= assoc >> 1;
201
		}
202
		break;
203
	default:
204
		return;
205
	}
206

207
	eax->split.is_self_initializing		= 1;
208
	eax->split.type				= types[index];
209
	eax->split.level			= levels[index];
210
	eax->split.num_threads_sharing		= 0;
211
	eax->split.num_cores_on_die		= topology_num_cores_per_package();
212

213
	if (assoc == AMD_CPUID4_FULLY_ASSOCIATIVE)
214
		eax->split.is_fully_associative = 1;
215

216
	ebx->split.coherency_line_size		= line_size - 1;
217
	ebx->split.ways_of_associativity	= assoc - 1;
218
	ebx->split.physical_line_partition	= lines_per_tag - 1;
219
	ecx->split.number_of_sets		= (size_in_kb * 1024) / line_size /
220
		(ebx->split.ways_of_associativity + 1) - 1;
221
}
222

223
static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax,
224
				 union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx)
225
{
226
	if (eax.split.type == CTYPE_NULL)
227
		return -EIO;
228

229
	id4->eax = eax;
230
	id4->ebx = ebx;
231
	id4->ecx = ecx;
232
	id4->size = (ecx.split.number_of_sets          + 1) *
233
		    (ebx.split.coherency_line_size     + 1) *
234
		    (ebx.split.physical_line_partition + 1) *
235
		    (ebx.split.ways_of_associativity   + 1);
236

237
	return 0;
238
}
239

240
static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4)
241
{
242
	union _cpuid4_leaf_eax eax;
243
	union _cpuid4_leaf_ebx ebx;
244
	union _cpuid4_leaf_ecx ecx;
245
	u32 ignored;
246

247
	if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
248
		cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored);
249
	else
250
		legacy_amd_cpuid4(index, &eax, &ebx, &ecx);
251

252
	return cpuid4_info_fill_done(id4, eax, ebx, ecx);
253
}
254

255
static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4)
256
{
257
	union _cpuid4_leaf_eax eax;
258
	union _cpuid4_leaf_ebx ebx;
259
	union _cpuid4_leaf_ecx ecx;
260
	u32 ignored;
261

262
	cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored);
263

264
	return cpuid4_info_fill_done(id4, eax, ebx, ecx);
265
}
266

267
static int fill_cpuid4_info(int index, struct _cpuid4_info *id4)
268
{
269
	u8 cpu_vendor = boot_cpu_data.x86_vendor;
270

271
	return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ?
272
		amd_fill_cpuid4_info(index, id4) :
273
		intel_fill_cpuid4_info(index, id4);
274
}
275

276
static int find_num_cache_leaves(struct cpuinfo_x86 *c)
277
{
278
	unsigned int eax, ebx, ecx, edx, op;
279
	union _cpuid4_leaf_eax cache_eax;
280
	int i = -1;
281

282
	/* Do a CPUID(op) loop to calculate num_cache_leaves */
283
	op = (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) ? 0x8000001d : 4;
284
	do {
285
		++i;
286
		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
287
		cache_eax.full = eax;
288
	} while (cache_eax.split.type != CTYPE_NULL);
289
	return i;
290
}
291

292
/*
293
 * AMD/Hygon CPUs may have multiple LLCs if L3 caches exist.
294
 */
295

296
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
297
{
298
	if (!cpuid_amd_hygon_has_l3_cache())
299
		return;
300

301
	if (c->x86 < 0x17) {
302
		/* Pre-Zen: LLC is at the node level */
303
		c->topo.llc_id = die_id;
304
	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
305
		/*
306
		 * Family 17h up to 1F models: LLC is at the core
307
		 * complex level.  Core complex ID is ApicId[3].
308
		 */
309
		c->topo.llc_id = c->topo.apicid >> 3;
310
	} else {
311
		/*
312
		 * Newer families: LLC ID is calculated from the number
313
		 * of threads sharing the L3 cache.
314
		 */
315
		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
316
		u32 llc_index = find_num_cache_leaves(c) - 1;
317

318
		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
319
		if (eax)
320
			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
321

322
		if (num_sharing_cache) {
323
			int index_msb = get_count_order(num_sharing_cache);
324

325
			c->topo.llc_id = c->topo.apicid >> index_msb;
326
		}
327
	}
328
}
329

330
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
331
{
332
	if (!cpuid_amd_hygon_has_l3_cache())
333
		return;
334

335
	/*
336
	 * Hygons are similar to AMD Family 17h up to 1F models: LLC is
337
	 * at the core complex level.  Core complex ID is ApicId[3].
338
	 */
339
	c->topo.llc_id = c->topo.apicid >> 3;
340
}
341

342
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
343
{
344
	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
345

346
	if (boot_cpu_has(X86_FEATURE_TOPOEXT))
347
		ci->num_leaves = find_num_cache_leaves(c);
348
	else if (c->extended_cpuid_level >= 0x80000006)
349
		ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3;
350
}
351

352
void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
353
{
354
	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
355

356
	ci->num_leaves = find_num_cache_leaves(c);
357
}
358

359
static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3,
360
				 unsigned int l2, unsigned int l1i, unsigned int l1d)
361
{
362
	/*
363
	 * If llc_id is still unset, then cpuid_level < 4, which implies
364
	 * that the only possibility left is SMT.  Since CPUID(0x2) doesn't
365
	 * specify any shared caches and SMT shares all caches, we can
366
	 * unconditionally set LLC ID to the package ID so that all
367
	 * threads share it.
368
	 */
369
	if (c->topo.llc_id == BAD_APICID)
370
		c->topo.llc_id = c->topo.pkg_id;
371

372
	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : l1i + l1d);
373

374
	if (!l2)
375
		cpu_detect_cache_sizes(c);
376
}
377

378
/*
379
 * Legacy Intel CPUID(0x2) path if CPUID(0x4) is not available.
380
 */
381
static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c)
382
{
383
	unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
384
	const struct leaf_0x2_table *desc;
385
	union leaf_0x2_regs regs;
386
	u8 *ptr;
387

388
	if (c->cpuid_level < 2)
389
		return;
390

391
	cpuid_leaf_0x2(&regs);
392
	for_each_cpuid_0x2_desc(regs, ptr, desc) {
393
		switch (desc->c_type) {
394
		case CACHE_L1_INST:	l1i += desc->c_size; break;
395
		case CACHE_L1_DATA:	l1d += desc->c_size; break;
396
		case CACHE_L2:		l2  += desc->c_size; break;
397
		case CACHE_L3:		l3  += desc->c_size; break;
398
		}
399
	}
400

401
	intel_cacheinfo_done(c, l3, l2, l1i, l1d);
402
}
403

404
static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4)
405
{
406
	unsigned int num_threads_sharing;
407
	int index_msb;
408

409
	num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
410
	index_msb = get_count_order(num_threads_sharing);
411
	return c->topo.apicid & ~((1 << index_msb) - 1);
412
}
413

414
static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
415
{
416
	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
417
	unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID;
418
	unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0;
419

420
	if (c->cpuid_level < 4)
421
		return false;
422

423
	/*
424
	 * There should be at least one leaf. A non-zero value means
425
	 * that the number of leaves has been previously initialized.
426
	 */
427
	if (!ci->num_leaves)
428
		ci->num_leaves = find_num_cache_leaves(c);
429

430
	if (!ci->num_leaves)
431
		return false;
432

433
	for (int i = 0; i < ci->num_leaves; i++) {
434
		struct _cpuid4_info id4 = {};
435
		int ret;
436

437
		ret = intel_fill_cpuid4_info(i, &id4);
438
		if (ret < 0)
439
			continue;
440

441
		switch (id4.eax.split.level) {
442
		case 1:
443
			if (id4.eax.split.type == CTYPE_DATA)
444
				l1d = id4.size / 1024;
445
			else if (id4.eax.split.type == CTYPE_INST)
446
				l1i = id4.size / 1024;
447
			break;
448
		case 2:
449
			l2 = id4.size / 1024;
450
			l2_id = calc_cache_topo_id(c, &id4);
451
			break;
452
		case 3:
453
			l3 = id4.size / 1024;
454
			l3_id = calc_cache_topo_id(c, &id4);
455
			break;
456
		default:
457
			break;
458
		}
459
	}
460

461
	c->topo.l2c_id = l2_id;
462
	c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id;
463
	intel_cacheinfo_done(c, l3, l2, l1i, l1d);
464
	return true;
465
}
466

467
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
468
{
469
	/* Don't use CPUID(0x2) if CPUID(0x4) is supported. */
470
	if (intel_cacheinfo_0x4(c))
471
		return;
472

473
	intel_cacheinfo_0x2(c);
474
}
475

476
/*
477
 * <linux/cacheinfo.h> shared_cpu_map setup, AMD/Hygon
478
 */
479
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
480
				    const struct _cpuid4_info *id4)
481
{
482
	struct cpu_cacheinfo *this_cpu_ci;
483
	struct cacheinfo *ci;
484
	int i, sibling;
485

486
	/*
487
	 * For L3, always use the pre-calculated cpu_llc_shared_mask
488
	 * to derive shared_cpu_map.
489
	 */
490
	if (index == 3) {
491
		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
492
			this_cpu_ci = get_cpu_cacheinfo(i);
493
			if (!this_cpu_ci->info_list)
494
				continue;
495

496
			ci = this_cpu_ci->info_list + index;
497
			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
498
				if (!cpu_online(sibling))
499
					continue;
500
				cpumask_set_cpu(sibling, &ci->shared_cpu_map);
501
			}
502
		}
503
	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
504
		unsigned int apicid, nshared, first, last;
505

506
		nshared = id4->eax.split.num_threads_sharing + 1;
507
		apicid = cpu_data(cpu).topo.apicid;
508
		first = apicid - (apicid % nshared);
509
		last = first + nshared - 1;
510

511
		for_each_online_cpu(i) {
512
			this_cpu_ci = get_cpu_cacheinfo(i);
513
			if (!this_cpu_ci->info_list)
514
				continue;
515

516
			apicid = cpu_data(i).topo.apicid;
517
			if ((apicid < first) || (apicid > last))
518
				continue;
519

520
			ci = this_cpu_ci->info_list + index;
521

522
			for_each_online_cpu(sibling) {
523
				apicid = cpu_data(sibling).topo.apicid;
524
				if ((apicid < first) || (apicid > last))
525
					continue;
526
				cpumask_set_cpu(sibling, &ci->shared_cpu_map);
527
			}
528
		}
529
	} else
530
		return 0;
531

532
	return 1;
533
}
534

535
/*
536
 * <linux/cacheinfo.h> shared_cpu_map setup, Intel + fallback AMD/Hygon
537
 */
538
static void __cache_cpumap_setup(unsigned int cpu, int index,
539
				 const struct _cpuid4_info *id4)
540
{
541
	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
542
	struct cpuinfo_x86 *c = &cpu_data(cpu);
543
	struct cacheinfo *ci, *sibling_ci;
544
	unsigned long num_threads_sharing;
545
	int index_msb, i;
546

547
	if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
548
		if (__cache_amd_cpumap_setup(cpu, index, id4))
549
			return;
550
	}
551

552
	ci = this_cpu_ci->info_list + index;
553
	num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
554

555
	cpumask_set_cpu(cpu, &ci->shared_cpu_map);
556
	if (num_threads_sharing == 1)
557
		return;
558

559
	index_msb = get_count_order(num_threads_sharing);
560

561
	for_each_online_cpu(i)
562
		if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
563
			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
564

565
			/* Skip if itself or no cacheinfo */
566
			if (i == cpu || !sib_cpu_ci->info_list)
567
				continue;
568

569
			sibling_ci = sib_cpu_ci->info_list + index;
570
			cpumask_set_cpu(i, &ci->shared_cpu_map);
571
			cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map);
572
		}
573
}
574

575
static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4,
576
			 struct amd_northbridge *nb)
577
{
578
	ci->id				= id4->id;
579
	ci->attributes			= CACHE_ID;
580
	ci->level			= id4->eax.split.level;
581
	ci->type			= cache_type_map[id4->eax.split.type];
582
	ci->coherency_line_size		= id4->ebx.split.coherency_line_size + 1;
583
	ci->ways_of_associativity	= id4->ebx.split.ways_of_associativity + 1;
584
	ci->size			= id4->size;
585
	ci->number_of_sets		= id4->ecx.split.number_of_sets + 1;
586
	ci->physical_line_partition	= id4->ebx.split.physical_line_partition + 1;
587
	ci->priv			= nb;
588
}
589

590
int init_cache_level(unsigned int cpu)
591
{
592
	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
593

594
	/* There should be at least one leaf. */
595
	if (!ci->num_leaves)
596
		return -ENOENT;
597

598
	return 0;
599
}
600

601
/*
602
 * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input
603
 * ECX as cache index. Then right shift apicid by the number's order to get
604
 * cache id for this cache node.
605
 */
606
static void get_cache_id(int cpu, struct _cpuid4_info *id4)
607
{
608
	struct cpuinfo_x86 *c = &cpu_data(cpu);
609
	unsigned long num_threads_sharing;
610
	int index_msb;
611

612
	num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
613
	index_msb = get_count_order(num_threads_sharing);
614
	id4->id = c->topo.apicid >> index_msb;
615
}
616

617
int populate_cache_leaves(unsigned int cpu)
618
{
619
	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
620
	struct cacheinfo *ci = this_cpu_ci->info_list;
621
	u8 cpu_vendor = boot_cpu_data.x86_vendor;
622
	struct amd_northbridge *nb = NULL;
623
	struct _cpuid4_info id4 = {};
624
	int idx, ret;
625

626
	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
627
		ret = fill_cpuid4_info(idx, &id4);
628
		if (ret)
629
			return ret;
630

631
		get_cache_id(cpu, &id4);
632

633
		if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON)
634
			nb = amd_init_l3_cache(idx);
635

636
		ci_info_init(ci++, &id4, nb);
637
		__cache_cpumap_setup(cpu, idx, &id4);
638
	}
639

640
	this_cpu_ci->cpu_map_populated = true;
641
	return 0;
642
}
643

644
/*
645
 * Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
646
 *
647
 * Since we are disabling the cache don't allow any interrupts,
648
 * they would run extremely slow and would only increase the pain.
649
 *
650
 * The caller must ensure that local interrupts are disabled and
651
 * are reenabled after cache_enable() has been called.
652
 */
653
static unsigned long saved_cr4;
654
static DEFINE_RAW_SPINLOCK(cache_disable_lock);
655

656
/*
657
 * Cache flushing is the most time-consuming step when programming the
658
 * MTRRs.  On many Intel CPUs without known erratas, it can be skipped
659
 * if the CPU declares cache self-snooping support.
660
 */
661
static void maybe_flush_caches(void)
662
{
663
	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
664
		wbinvd();
665
}
666

667
void cache_disable(void) __acquires(cache_disable_lock)
668
{
669
	unsigned long cr0;
670

671
	/*
672
	 * This is not ideal since the cache is only flushed/disabled
673
	 * for this CPU while the MTRRs are changed, but changing this
674
	 * requires more invasive changes to the way the kernel boots.
675
	 */
676
	raw_spin_lock(&cache_disable_lock);
677

678
	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
679
	cr0 = read_cr0() | X86_CR0_CD;
680
	write_cr0(cr0);
681

682
	maybe_flush_caches();
683

684
	/* Save value of CR4 and clear Page Global Enable (bit 7) */
685
	if (cpu_feature_enabled(X86_FEATURE_PGE)) {
686
		saved_cr4 = __read_cr4();
687
		__write_cr4(saved_cr4 & ~X86_CR4_PGE);
688
	}
689

690
	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
691
	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
692
	flush_tlb_local();
693

694
	if (cpu_feature_enabled(X86_FEATURE_MTRR))
695
		mtrr_disable();
696

697
	maybe_flush_caches();
698
}
699

700
void cache_enable(void) __releases(cache_disable_lock)
701
{
702
	/* Flush TLBs (no need to flush caches - they are disabled) */
703
	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
704
	flush_tlb_local();
705

706
	if (cpu_feature_enabled(X86_FEATURE_MTRR))
707
		mtrr_enable();
708

709
	/* Enable caches */
710
	write_cr0(read_cr0() & ~X86_CR0_CD);
711

712
	/* Restore value of CR4 */
713
	if (cpu_feature_enabled(X86_FEATURE_PGE))
714
		__write_cr4(saved_cr4);
715

716
	raw_spin_unlock(&cache_disable_lock);
717
}
718

719
static void cache_cpu_init(void)
720
{
721
	unsigned long flags;
722

723
	local_irq_save(flags);
724

725
	if (memory_caching_control & CACHE_MTRR) {
726
		cache_disable();
727
		mtrr_generic_set_state();
728
		cache_enable();
729
	}
730

731
	if (memory_caching_control & CACHE_PAT)
732
		pat_cpu_init();
733

734
	local_irq_restore(flags);
735
}
736

737
static bool cache_aps_delayed_init = true;
738

739
void set_cache_aps_delayed_init(bool val)
740
{
741
	cache_aps_delayed_init = val;
742
}
743

744
bool get_cache_aps_delayed_init(void)
745
{
746
	return cache_aps_delayed_init;
747
}
748

749
static int cache_rendezvous_handler(void *unused)
750
{
751
	if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
752
		cache_cpu_init();
753

754
	return 0;
755
}
756

757
void __init cache_bp_init(void)
758
{
759
	mtrr_bp_init();
760
	pat_bp_init();
761

762
	if (memory_caching_control)
763
		cache_cpu_init();
764
}
765

766
void cache_bp_restore(void)
767
{
768
	if (memory_caching_control)
769
		cache_cpu_init();
770
}
771

772
static int cache_ap_online(unsigned int cpu)
773
{
774
	cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
775

776
	if (!memory_caching_control || get_cache_aps_delayed_init())
777
		return 0;
778

779
	/*
780
	 * Ideally we should hold mtrr_mutex here to avoid MTRR entries
781
	 * changed, but this routine will be called in CPU boot time,
782
	 * holding the lock breaks it.
783
	 *
784
	 * This routine is called in two cases:
785
	 *
786
	 *   1. very early time of software resume, when there absolutely
787
	 *      isn't MTRR entry changes;
788
	 *
789
	 *   2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
790
	 *      lock to prevent MTRR entry changes
791
	 */
792
	stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
793
				       cpu_cacheinfo_mask);
794

795
	return 0;
796
}
797

798
static int cache_ap_offline(unsigned int cpu)
799
{
800
	cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
801
	return 0;
802
}
803

804
/*
805
 * Delayed cache initialization for all AP's
806
 */
807
void cache_aps_init(void)
808
{
809
	if (!memory_caching_control || !get_cache_aps_delayed_init())
810
		return;
811

812
	stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
813
	set_cache_aps_delayed_init(false);
814
}
815

816
static int __init cache_ap_register(void)
817
{
818
	zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
819
	cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
820

821
	cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
822
				  "x86/cachectrl:starting",
823
				  cache_ap_online, cache_ap_offline);
824
	return 0;
825
}
826
early_initcall(cache_ap_register);
827

828
Product

Resources

Company