Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/kernel/cpu/intel_cacheinfo.c
10699 views
1
/*
2
* Routines to indentify caches on Intel CPU.
3
*
4
* Changes:
5
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6
* Ashok Raj <[email protected]>: Work with CPU hotplug infrastructure.
7
* Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8
*/
9
10
#include <linux/init.h>
11
#include <linux/slab.h>
12
#include <linux/device.h>
13
#include <linux/compiler.h>
14
#include <linux/cpu.h>
15
#include <linux/sched.h>
16
#include <linux/pci.h>
17
18
#include <asm/processor.h>
19
#include <linux/smp.h>
20
#include <asm/amd_nb.h>
21
#include <asm/smp.h>
22
23
#define LVL_1_INST 1
24
#define LVL_1_DATA 2
25
#define LVL_2 3
26
#define LVL_3 4
27
#define LVL_TRACE 5
28
29
struct _cache_table {
30
unsigned char descriptor;
31
char cache_type;
32
short size;
33
};
34
35
#define MB(x) ((x) * 1024)
36
37
/* All the cache descriptor types we care about (no TLB or
38
trace cache entries) */
39
40
static const struct _cache_table __cpuinitconst cache_table[] =
41
{
42
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
43
{ 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
44
{ 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
45
{ 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
46
{ 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
47
{ 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
48
{ 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
49
{ 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
50
{ 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
51
{ 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52
{ 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
53
{ 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
54
{ 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
55
{ 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
56
{ 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
57
{ 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
58
{ 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
59
{ 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
60
{ 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
61
{ 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
62
{ 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
63
{ 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
64
{ 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
65
{ 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
66
{ 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
67
{ 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
68
{ 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
69
{ 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
70
{ 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
71
{ 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
72
{ 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
73
{ 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
74
{ 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
75
{ 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
76
{ 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
77
{ 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
78
{ 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79
{ 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
80
{ 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
81
{ 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
82
{ 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
83
{ 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
84
{ 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
85
{ 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
86
{ 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87
{ 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
88
{ 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
89
{ 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
90
{ 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
91
{ 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
92
{ 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
93
{ 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
94
{ 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
95
{ 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
96
{ 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
97
{ 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
98
{ 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
99
{ 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
100
{ 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
101
{ 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
102
{ 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
103
{ 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
104
{ 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
105
{ 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
106
{ 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
107
{ 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
108
{ 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
109
{ 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
110
{ 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
111
{ 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
112
{ 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
113
{ 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
114
{ 0x00, 0, 0}
115
};
116
117
118
enum _cache_type {
119
CACHE_TYPE_NULL = 0,
120
CACHE_TYPE_DATA = 1,
121
CACHE_TYPE_INST = 2,
122
CACHE_TYPE_UNIFIED = 3
123
};
124
125
union _cpuid4_leaf_eax {
126
struct {
127
enum _cache_type type:5;
128
unsigned int level:3;
129
unsigned int is_self_initializing:1;
130
unsigned int is_fully_associative:1;
131
unsigned int reserved:4;
132
unsigned int num_threads_sharing:12;
133
unsigned int num_cores_on_die:6;
134
} split;
135
u32 full;
136
};
137
138
union _cpuid4_leaf_ebx {
139
struct {
140
unsigned int coherency_line_size:12;
141
unsigned int physical_line_partition:10;
142
unsigned int ways_of_associativity:10;
143
} split;
144
u32 full;
145
};
146
147
union _cpuid4_leaf_ecx {
148
struct {
149
unsigned int number_of_sets:32;
150
} split;
151
u32 full;
152
};
153
154
struct amd_l3_cache {
155
struct amd_northbridge *nb;
156
unsigned indices;
157
u8 subcaches[4];
158
};
159
160
struct _cpuid4_info {
161
union _cpuid4_leaf_eax eax;
162
union _cpuid4_leaf_ebx ebx;
163
union _cpuid4_leaf_ecx ecx;
164
unsigned long size;
165
struct amd_l3_cache *l3;
166
DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
167
};
168
169
/* subset of above _cpuid4_info w/o shared_cpu_map */
170
struct _cpuid4_info_regs {
171
union _cpuid4_leaf_eax eax;
172
union _cpuid4_leaf_ebx ebx;
173
union _cpuid4_leaf_ecx ecx;
174
unsigned long size;
175
struct amd_l3_cache *l3;
176
};
177
178
unsigned short num_cache_leaves;
179
180
/* AMD doesn't have CPUID4. Emulate it here to report the same
181
information to the user. This makes some assumptions about the machine:
182
L2 not shared, no SMT etc. that is currently true on AMD CPUs.
183
184
In theory the TLBs could be reported as fake type (they are in "dummy").
185
Maybe later */
186
union l1_cache {
187
struct {
188
unsigned line_size:8;
189
unsigned lines_per_tag:8;
190
unsigned assoc:8;
191
unsigned size_in_kb:8;
192
};
193
unsigned val;
194
};
195
196
union l2_cache {
197
struct {
198
unsigned line_size:8;
199
unsigned lines_per_tag:4;
200
unsigned assoc:4;
201
unsigned size_in_kb:16;
202
};
203
unsigned val;
204
};
205
206
union l3_cache {
207
struct {
208
unsigned line_size:8;
209
unsigned lines_per_tag:4;
210
unsigned assoc:4;
211
unsigned res:2;
212
unsigned size_encoded:14;
213
};
214
unsigned val;
215
};
216
217
static const unsigned short __cpuinitconst assocs[] = {
218
[1] = 1,
219
[2] = 2,
220
[4] = 4,
221
[6] = 8,
222
[8] = 16,
223
[0xa] = 32,
224
[0xb] = 48,
225
[0xc] = 64,
226
[0xd] = 96,
227
[0xe] = 128,
228
[0xf] = 0xffff /* fully associative - no way to show this currently */
229
};
230
231
static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
232
static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
233
234
static void __cpuinit
235
amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
236
union _cpuid4_leaf_ebx *ebx,
237
union _cpuid4_leaf_ecx *ecx)
238
{
239
unsigned dummy;
240
unsigned line_size, lines_per_tag, assoc, size_in_kb;
241
union l1_cache l1i, l1d;
242
union l2_cache l2;
243
union l3_cache l3;
244
union l1_cache *l1 = &l1d;
245
246
eax->full = 0;
247
ebx->full = 0;
248
ecx->full = 0;
249
250
cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
251
cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
252
253
switch (leaf) {
254
case 1:
255
l1 = &l1i;
256
case 0:
257
if (!l1->val)
258
return;
259
assoc = assocs[l1->assoc];
260
line_size = l1->line_size;
261
lines_per_tag = l1->lines_per_tag;
262
size_in_kb = l1->size_in_kb;
263
break;
264
case 2:
265
if (!l2.val)
266
return;
267
assoc = assocs[l2.assoc];
268
line_size = l2.line_size;
269
lines_per_tag = l2.lines_per_tag;
270
/* cpu_data has errata corrections for K7 applied */
271
size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
272
break;
273
case 3:
274
if (!l3.val)
275
return;
276
assoc = assocs[l3.assoc];
277
line_size = l3.line_size;
278
lines_per_tag = l3.lines_per_tag;
279
size_in_kb = l3.size_encoded * 512;
280
if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
281
size_in_kb = size_in_kb >> 1;
282
assoc = assoc >> 1;
283
}
284
break;
285
default:
286
return;
287
}
288
289
eax->split.is_self_initializing = 1;
290
eax->split.type = types[leaf];
291
eax->split.level = levels[leaf];
292
eax->split.num_threads_sharing = 0;
293
eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
294
295
296
if (assoc == 0xffff)
297
eax->split.is_fully_associative = 1;
298
ebx->split.coherency_line_size = line_size - 1;
299
ebx->split.ways_of_associativity = assoc - 1;
300
ebx->split.physical_line_partition = lines_per_tag - 1;
301
ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
302
(ebx->split.ways_of_associativity + 1) - 1;
303
}
304
305
struct _cache_attr {
306
struct attribute attr;
307
ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
308
ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
309
unsigned int);
310
};
311
312
#ifdef CONFIG_AMD_NB
313
314
/*
315
* L3 cache descriptors
316
*/
317
static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
318
{
319
unsigned int sc0, sc1, sc2, sc3;
320
u32 val = 0;
321
322
pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
323
324
/* calculate subcache sizes */
325
l3->subcaches[0] = sc0 = !(val & BIT(0));
326
l3->subcaches[1] = sc1 = !(val & BIT(4));
327
l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
328
l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
329
330
l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
331
}
332
333
static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
334
int index)
335
{
336
static struct amd_l3_cache *__cpuinitdata l3_caches;
337
int node;
338
339
/* only for L3, and not in virtualized environments */
340
if (index < 3 || amd_nb_num() == 0)
341
return;
342
343
/*
344
* Strictly speaking, the amount in @size below is leaked since it is
345
* never freed but this is done only on shutdown so it doesn't matter.
346
*/
347
if (!l3_caches) {
348
int size = amd_nb_num() * sizeof(struct amd_l3_cache);
349
350
l3_caches = kzalloc(size, GFP_ATOMIC);
351
if (!l3_caches)
352
return;
353
}
354
355
node = amd_get_nb_id(smp_processor_id());
356
357
if (!l3_caches[node].nb) {
358
l3_caches[node].nb = node_to_amd_nb(node);
359
amd_calc_l3_indices(&l3_caches[node]);
360
}
361
362
this_leaf->l3 = &l3_caches[node];
363
}
364
365
/*
366
* check whether a slot used for disabling an L3 index is occupied.
367
* @l3: L3 cache descriptor
368
* @slot: slot number (0..1)
369
*
370
* @returns: the disabled index if used or negative value if slot free.
371
*/
372
int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
373
{
374
unsigned int reg = 0;
375
376
pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
377
378
/* check whether this slot is activated already */
379
if (reg & (3UL << 30))
380
return reg & 0xfff;
381
382
return -1;
383
}
384
385
static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
386
unsigned int slot)
387
{
388
int index;
389
390
if (!this_leaf->l3 ||
391
!amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
392
return -EINVAL;
393
394
index = amd_get_l3_disable_slot(this_leaf->l3, slot);
395
if (index >= 0)
396
return sprintf(buf, "%d\n", index);
397
398
return sprintf(buf, "FREE\n");
399
}
400
401
#define SHOW_CACHE_DISABLE(slot) \
402
static ssize_t \
403
show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
404
unsigned int cpu) \
405
{ \
406
return show_cache_disable(this_leaf, buf, slot); \
407
}
408
SHOW_CACHE_DISABLE(0)
409
SHOW_CACHE_DISABLE(1)
410
411
static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
412
unsigned slot, unsigned long idx)
413
{
414
int i;
415
416
idx |= BIT(30);
417
418
/*
419
* disable index in all 4 subcaches
420
*/
421
for (i = 0; i < 4; i++) {
422
u32 reg = idx | (i << 20);
423
424
if (!l3->subcaches[i])
425
continue;
426
427
pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
428
429
/*
430
* We need to WBINVD on a core on the node containing the L3
431
* cache which indices we disable therefore a simple wbinvd()
432
* is not sufficient.
433
*/
434
wbinvd_on_cpu(cpu);
435
436
reg |= BIT(31);
437
pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
438
}
439
}
440
441
/*
442
* disable a L3 cache index by using a disable-slot
443
*
444
* @l3: L3 cache descriptor
445
* @cpu: A CPU on the node containing the L3 cache
446
* @slot: slot number (0..1)
447
* @index: index to disable
448
*
449
* @return: 0 on success, error status on failure
450
*/
451
int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
452
unsigned long index)
453
{
454
int ret = 0;
455
456
/* check if @slot is already used or the index is already disabled */
457
ret = amd_get_l3_disable_slot(l3, slot);
458
if (ret >= 0)
459
return -EINVAL;
460
461
if (index > l3->indices)
462
return -EINVAL;
463
464
/* check whether the other slot has disabled the same index already */
465
if (index == amd_get_l3_disable_slot(l3, !slot))
466
return -EINVAL;
467
468
amd_l3_disable_index(l3, cpu, slot, index);
469
470
return 0;
471
}
472
473
static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
474
const char *buf, size_t count,
475
unsigned int slot)
476
{
477
unsigned long val = 0;
478
int cpu, err = 0;
479
480
if (!capable(CAP_SYS_ADMIN))
481
return -EPERM;
482
483
if (!this_leaf->l3 ||
484
!amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
485
return -EINVAL;
486
487
cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
488
489
if (strict_strtoul(buf, 10, &val) < 0)
490
return -EINVAL;
491
492
err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
493
if (err) {
494
if (err == -EEXIST)
495
printk(KERN_WARNING "L3 disable slot %d in use!\n",
496
slot);
497
return err;
498
}
499
return count;
500
}
501
502
#define STORE_CACHE_DISABLE(slot) \
503
static ssize_t \
504
store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
505
const char *buf, size_t count, \
506
unsigned int cpu) \
507
{ \
508
return store_cache_disable(this_leaf, buf, count, slot); \
509
}
510
STORE_CACHE_DISABLE(0)
511
STORE_CACHE_DISABLE(1)
512
513
static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
514
show_cache_disable_0, store_cache_disable_0);
515
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
516
show_cache_disable_1, store_cache_disable_1);
517
518
static ssize_t
519
show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
520
{
521
if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
522
return -EINVAL;
523
524
return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
525
}
526
527
static ssize_t
528
store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
529
unsigned int cpu)
530
{
531
unsigned long val;
532
533
if (!capable(CAP_SYS_ADMIN))
534
return -EPERM;
535
536
if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
537
return -EINVAL;
538
539
if (strict_strtoul(buf, 16, &val) < 0)
540
return -EINVAL;
541
542
if (amd_set_subcaches(cpu, val))
543
return -EINVAL;
544
545
return count;
546
}
547
548
static struct _cache_attr subcaches =
549
__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
550
551
#else /* CONFIG_AMD_NB */
552
#define amd_init_l3_cache(x, y)
553
#endif /* CONFIG_AMD_NB */
554
555
static int
556
__cpuinit cpuid4_cache_lookup_regs(int index,
557
struct _cpuid4_info_regs *this_leaf)
558
{
559
union _cpuid4_leaf_eax eax;
560
union _cpuid4_leaf_ebx ebx;
561
union _cpuid4_leaf_ecx ecx;
562
unsigned edx;
563
564
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
565
amd_cpuid4(index, &eax, &ebx, &ecx);
566
amd_init_l3_cache(this_leaf, index);
567
} else {
568
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
569
}
570
571
if (eax.split.type == CACHE_TYPE_NULL)
572
return -EIO; /* better error ? */
573
574
this_leaf->eax = eax;
575
this_leaf->ebx = ebx;
576
this_leaf->ecx = ecx;
577
this_leaf->size = (ecx.split.number_of_sets + 1) *
578
(ebx.split.coherency_line_size + 1) *
579
(ebx.split.physical_line_partition + 1) *
580
(ebx.split.ways_of_associativity + 1);
581
return 0;
582
}
583
584
static int __cpuinit find_num_cache_leaves(void)
585
{
586
unsigned int eax, ebx, ecx, edx;
587
union _cpuid4_leaf_eax cache_eax;
588
int i = -1;
589
590
do {
591
++i;
592
/* Do cpuid(4) loop to find out num_cache_leaves */
593
cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
594
cache_eax.full = eax;
595
} while (cache_eax.split.type != CACHE_TYPE_NULL);
596
return i;
597
}
598
599
unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
600
{
601
/* Cache sizes */
602
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
603
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
604
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
605
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
606
#ifdef CONFIG_X86_HT
607
unsigned int cpu = c->cpu_index;
608
#endif
609
610
if (c->cpuid_level > 3) {
611
static int is_initialized;
612
613
if (is_initialized == 0) {
614
/* Init num_cache_leaves from boot CPU */
615
num_cache_leaves = find_num_cache_leaves();
616
is_initialized++;
617
}
618
619
/*
620
* Whenever possible use cpuid(4), deterministic cache
621
* parameters cpuid leaf to find the cache details
622
*/
623
for (i = 0; i < num_cache_leaves; i++) {
624
struct _cpuid4_info_regs this_leaf;
625
int retval;
626
627
retval = cpuid4_cache_lookup_regs(i, &this_leaf);
628
if (retval >= 0) {
629
switch (this_leaf.eax.split.level) {
630
case 1:
631
if (this_leaf.eax.split.type ==
632
CACHE_TYPE_DATA)
633
new_l1d = this_leaf.size/1024;
634
else if (this_leaf.eax.split.type ==
635
CACHE_TYPE_INST)
636
new_l1i = this_leaf.size/1024;
637
break;
638
case 2:
639
new_l2 = this_leaf.size/1024;
640
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
641
index_msb = get_count_order(num_threads_sharing);
642
l2_id = c->apicid >> index_msb;
643
break;
644
case 3:
645
new_l3 = this_leaf.size/1024;
646
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
647
index_msb = get_count_order(
648
num_threads_sharing);
649
l3_id = c->apicid >> index_msb;
650
break;
651
default:
652
break;
653
}
654
}
655
}
656
}
657
/*
658
* Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
659
* trace cache
660
*/
661
if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
662
/* supports eax=2 call */
663
int j, n;
664
unsigned int regs[4];
665
unsigned char *dp = (unsigned char *)regs;
666
int only_trace = 0;
667
668
if (num_cache_leaves != 0 && c->x86 == 15)
669
only_trace = 1;
670
671
/* Number of times to iterate */
672
n = cpuid_eax(2) & 0xFF;
673
674
for (i = 0 ; i < n ; i++) {
675
cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
676
677
/* If bit 31 is set, this is an unknown format */
678
for (j = 0 ; j < 3 ; j++)
679
if (regs[j] & (1 << 31))
680
regs[j] = 0;
681
682
/* Byte 0 is level count, not a descriptor */
683
for (j = 1 ; j < 16 ; j++) {
684
unsigned char des = dp[j];
685
unsigned char k = 0;
686
687
/* look up this descriptor in the table */
688
while (cache_table[k].descriptor != 0) {
689
if (cache_table[k].descriptor == des) {
690
if (only_trace && cache_table[k].cache_type != LVL_TRACE)
691
break;
692
switch (cache_table[k].cache_type) {
693
case LVL_1_INST:
694
l1i += cache_table[k].size;
695
break;
696
case LVL_1_DATA:
697
l1d += cache_table[k].size;
698
break;
699
case LVL_2:
700
l2 += cache_table[k].size;
701
break;
702
case LVL_3:
703
l3 += cache_table[k].size;
704
break;
705
case LVL_TRACE:
706
trace += cache_table[k].size;
707
break;
708
}
709
710
break;
711
}
712
713
k++;
714
}
715
}
716
}
717
}
718
719
if (new_l1d)
720
l1d = new_l1d;
721
722
if (new_l1i)
723
l1i = new_l1i;
724
725
if (new_l2) {
726
l2 = new_l2;
727
#ifdef CONFIG_X86_HT
728
per_cpu(cpu_llc_id, cpu) = l2_id;
729
#endif
730
}
731
732
if (new_l3) {
733
l3 = new_l3;
734
#ifdef CONFIG_X86_HT
735
per_cpu(cpu_llc_id, cpu) = l3_id;
736
#endif
737
}
738
739
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
740
741
return l2;
742
}
743
744
#ifdef CONFIG_SYSFS
745
746
/* pointer to _cpuid4_info array (for each cache leaf) */
747
static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
748
#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
749
750
#ifdef CONFIG_SMP
751
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
752
{
753
struct _cpuid4_info *this_leaf, *sibling_leaf;
754
unsigned long num_threads_sharing;
755
int index_msb, i, sibling;
756
struct cpuinfo_x86 *c = &cpu_data(cpu);
757
758
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
759
for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
760
if (!per_cpu(ici_cpuid4_info, i))
761
continue;
762
this_leaf = CPUID4_INFO_IDX(i, index);
763
for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
764
if (!cpu_online(sibling))
765
continue;
766
set_bit(sibling, this_leaf->shared_cpu_map);
767
}
768
}
769
return;
770
}
771
this_leaf = CPUID4_INFO_IDX(cpu, index);
772
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
773
774
if (num_threads_sharing == 1)
775
cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
776
else {
777
index_msb = get_count_order(num_threads_sharing);
778
779
for_each_online_cpu(i) {
780
if (cpu_data(i).apicid >> index_msb ==
781
c->apicid >> index_msb) {
782
cpumask_set_cpu(i,
783
to_cpumask(this_leaf->shared_cpu_map));
784
if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
785
sibling_leaf =
786
CPUID4_INFO_IDX(i, index);
787
cpumask_set_cpu(cpu, to_cpumask(
788
sibling_leaf->shared_cpu_map));
789
}
790
}
791
}
792
}
793
}
794
static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
795
{
796
struct _cpuid4_info *this_leaf, *sibling_leaf;
797
int sibling;
798
799
this_leaf = CPUID4_INFO_IDX(cpu, index);
800
for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
801
sibling_leaf = CPUID4_INFO_IDX(sibling, index);
802
cpumask_clear_cpu(cpu,
803
to_cpumask(sibling_leaf->shared_cpu_map));
804
}
805
}
806
#else
807
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
808
{
809
}
810
811
static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
812
{
813
}
814
#endif
815
816
static void __cpuinit free_cache_attributes(unsigned int cpu)
817
{
818
int i;
819
820
for (i = 0; i < num_cache_leaves; i++)
821
cache_remove_shared_cpu_map(cpu, i);
822
823
kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
824
kfree(per_cpu(ici_cpuid4_info, cpu));
825
per_cpu(ici_cpuid4_info, cpu) = NULL;
826
}
827
828
static int
829
__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
830
{
831
struct _cpuid4_info_regs *leaf_regs =
832
(struct _cpuid4_info_regs *)this_leaf;
833
834
return cpuid4_cache_lookup_regs(index, leaf_regs);
835
}
836
837
static void __cpuinit get_cpu_leaves(void *_retval)
838
{
839
int j, *retval = _retval, cpu = smp_processor_id();
840
841
/* Do cpuid and store the results */
842
for (j = 0; j < num_cache_leaves; j++) {
843
struct _cpuid4_info *this_leaf;
844
this_leaf = CPUID4_INFO_IDX(cpu, j);
845
*retval = cpuid4_cache_lookup(j, this_leaf);
846
if (unlikely(*retval < 0)) {
847
int i;
848
849
for (i = 0; i < j; i++)
850
cache_remove_shared_cpu_map(cpu, i);
851
break;
852
}
853
cache_shared_cpu_map_setup(cpu, j);
854
}
855
}
856
857
static int __cpuinit detect_cache_attributes(unsigned int cpu)
858
{
859
int retval;
860
861
if (num_cache_leaves == 0)
862
return -ENOENT;
863
864
per_cpu(ici_cpuid4_info, cpu) = kzalloc(
865
sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
866
if (per_cpu(ici_cpuid4_info, cpu) == NULL)
867
return -ENOMEM;
868
869
smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
870
if (retval) {
871
kfree(per_cpu(ici_cpuid4_info, cpu));
872
per_cpu(ici_cpuid4_info, cpu) = NULL;
873
}
874
875
return retval;
876
}
877
878
#include <linux/kobject.h>
879
#include <linux/sysfs.h>
880
881
extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
882
883
/* pointer to kobject for cpuX/cache */
884
static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
885
886
struct _index_kobject {
887
struct kobject kobj;
888
unsigned int cpu;
889
unsigned short index;
890
};
891
892
/* pointer to array of kobjects for cpuX/cache/indexY */
893
static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
894
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
895
896
#define show_one_plus(file_name, object, val) \
897
static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
898
unsigned int cpu) \
899
{ \
900
return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
901
}
902
903
show_one_plus(level, eax.split.level, 0);
904
show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
905
show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
906
show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
907
show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
908
909
static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
910
unsigned int cpu)
911
{
912
return sprintf(buf, "%luK\n", this_leaf->size / 1024);
913
}
914
915
static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
916
int type, char *buf)
917
{
918
ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
919
int n = 0;
920
921
if (len > 1) {
922
const struct cpumask *mask;
923
924
mask = to_cpumask(this_leaf->shared_cpu_map);
925
n = type ?
926
cpulist_scnprintf(buf, len-2, mask) :
927
cpumask_scnprintf(buf, len-2, mask);
928
buf[n++] = '\n';
929
buf[n] = '\0';
930
}
931
return n;
932
}
933
934
static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
935
unsigned int cpu)
936
{
937
return show_shared_cpu_map_func(leaf, 0, buf);
938
}
939
940
static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
941
unsigned int cpu)
942
{
943
return show_shared_cpu_map_func(leaf, 1, buf);
944
}
945
946
static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
947
unsigned int cpu)
948
{
949
switch (this_leaf->eax.split.type) {
950
case CACHE_TYPE_DATA:
951
return sprintf(buf, "Data\n");
952
case CACHE_TYPE_INST:
953
return sprintf(buf, "Instruction\n");
954
case CACHE_TYPE_UNIFIED:
955
return sprintf(buf, "Unified\n");
956
default:
957
return sprintf(buf, "Unknown\n");
958
}
959
}
960
961
#define to_object(k) container_of(k, struct _index_kobject, kobj)
962
#define to_attr(a) container_of(a, struct _cache_attr, attr)
963
964
#define define_one_ro(_name) \
965
static struct _cache_attr _name = \
966
__ATTR(_name, 0444, show_##_name, NULL)
967
968
define_one_ro(level);
969
define_one_ro(type);
970
define_one_ro(coherency_line_size);
971
define_one_ro(physical_line_partition);
972
define_one_ro(ways_of_associativity);
973
define_one_ro(number_of_sets);
974
define_one_ro(size);
975
define_one_ro(shared_cpu_map);
976
define_one_ro(shared_cpu_list);
977
978
static struct attribute *default_attrs[] = {
979
&type.attr,
980
&level.attr,
981
&coherency_line_size.attr,
982
&physical_line_partition.attr,
983
&ways_of_associativity.attr,
984
&number_of_sets.attr,
985
&size.attr,
986
&shared_cpu_map.attr,
987
&shared_cpu_list.attr,
988
NULL
989
};
990
991
#ifdef CONFIG_AMD_NB
992
static struct attribute ** __cpuinit amd_l3_attrs(void)
993
{
994
static struct attribute **attrs;
995
int n;
996
997
if (attrs)
998
return attrs;
999
1000
n = sizeof (default_attrs) / sizeof (struct attribute *);
1001
1002
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
1003
n += 2;
1004
1005
if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1006
n += 1;
1007
1008
attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1009
if (attrs == NULL)
1010
return attrs = default_attrs;
1011
1012
for (n = 0; default_attrs[n]; n++)
1013
attrs[n] = default_attrs[n];
1014
1015
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
1016
attrs[n++] = &cache_disable_0.attr;
1017
attrs[n++] = &cache_disable_1.attr;
1018
}
1019
1020
if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1021
attrs[n++] = &subcaches.attr;
1022
1023
return attrs;
1024
}
1025
#endif
1026
1027
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1028
{
1029
struct _cache_attr *fattr = to_attr(attr);
1030
struct _index_kobject *this_leaf = to_object(kobj);
1031
ssize_t ret;
1032
1033
ret = fattr->show ?
1034
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1035
buf, this_leaf->cpu) :
1036
0;
1037
return ret;
1038
}
1039
1040
static ssize_t store(struct kobject *kobj, struct attribute *attr,
1041
const char *buf, size_t count)
1042
{
1043
struct _cache_attr *fattr = to_attr(attr);
1044
struct _index_kobject *this_leaf = to_object(kobj);
1045
ssize_t ret;
1046
1047
ret = fattr->store ?
1048
fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1049
buf, count, this_leaf->cpu) :
1050
0;
1051
return ret;
1052
}
1053
1054
static const struct sysfs_ops sysfs_ops = {
1055
.show = show,
1056
.store = store,
1057
};
1058
1059
static struct kobj_type ktype_cache = {
1060
.sysfs_ops = &sysfs_ops,
1061
.default_attrs = default_attrs,
1062
};
1063
1064
static struct kobj_type ktype_percpu_entry = {
1065
.sysfs_ops = &sysfs_ops,
1066
};
1067
1068
static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1069
{
1070
kfree(per_cpu(ici_cache_kobject, cpu));
1071
kfree(per_cpu(ici_index_kobject, cpu));
1072
per_cpu(ici_cache_kobject, cpu) = NULL;
1073
per_cpu(ici_index_kobject, cpu) = NULL;
1074
free_cache_attributes(cpu);
1075
}
1076
1077
static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1078
{
1079
int err;
1080
1081
if (num_cache_leaves == 0)
1082
return -ENOENT;
1083
1084
err = detect_cache_attributes(cpu);
1085
if (err)
1086
return err;
1087
1088
/* Allocate all required memory */
1089
per_cpu(ici_cache_kobject, cpu) =
1090
kzalloc(sizeof(struct kobject), GFP_KERNEL);
1091
if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1092
goto err_out;
1093
1094
per_cpu(ici_index_kobject, cpu) = kzalloc(
1095
sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1096
if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1097
goto err_out;
1098
1099
return 0;
1100
1101
err_out:
1102
cpuid4_cache_sysfs_exit(cpu);
1103
return -ENOMEM;
1104
}
1105
1106
static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1107
1108
/* Add/Remove cache interface for CPU device */
1109
static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1110
{
1111
unsigned int cpu = sys_dev->id;
1112
unsigned long i, j;
1113
struct _index_kobject *this_object;
1114
struct _cpuid4_info *this_leaf;
1115
int retval;
1116
1117
retval = cpuid4_cache_sysfs_init(cpu);
1118
if (unlikely(retval < 0))
1119
return retval;
1120
1121
retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1122
&ktype_percpu_entry,
1123
&sys_dev->kobj, "%s", "cache");
1124
if (retval < 0) {
1125
cpuid4_cache_sysfs_exit(cpu);
1126
return retval;
1127
}
1128
1129
for (i = 0; i < num_cache_leaves; i++) {
1130
this_object = INDEX_KOBJECT_PTR(cpu, i);
1131
this_object->cpu = cpu;
1132
this_object->index = i;
1133
1134
this_leaf = CPUID4_INFO_IDX(cpu, i);
1135
1136
ktype_cache.default_attrs = default_attrs;
1137
#ifdef CONFIG_AMD_NB
1138
if (this_leaf->l3)
1139
ktype_cache.default_attrs = amd_l3_attrs();
1140
#endif
1141
retval = kobject_init_and_add(&(this_object->kobj),
1142
&ktype_cache,
1143
per_cpu(ici_cache_kobject, cpu),
1144
"index%1lu", i);
1145
if (unlikely(retval)) {
1146
for (j = 0; j < i; j++)
1147
kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1148
kobject_put(per_cpu(ici_cache_kobject, cpu));
1149
cpuid4_cache_sysfs_exit(cpu);
1150
return retval;
1151
}
1152
kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1153
}
1154
cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1155
1156
kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1157
return 0;
1158
}
1159
1160
static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1161
{
1162
unsigned int cpu = sys_dev->id;
1163
unsigned long i;
1164
1165
if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1166
return;
1167
if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1168
return;
1169
cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1170
1171
for (i = 0; i < num_cache_leaves; i++)
1172
kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1173
kobject_put(per_cpu(ici_cache_kobject, cpu));
1174
cpuid4_cache_sysfs_exit(cpu);
1175
}
1176
1177
static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1178
unsigned long action, void *hcpu)
1179
{
1180
unsigned int cpu = (unsigned long)hcpu;
1181
struct sys_device *sys_dev;
1182
1183
sys_dev = get_cpu_sysdev(cpu);
1184
switch (action) {
1185
case CPU_ONLINE:
1186
case CPU_ONLINE_FROZEN:
1187
cache_add_dev(sys_dev);
1188
break;
1189
case CPU_DEAD:
1190
case CPU_DEAD_FROZEN:
1191
cache_remove_dev(sys_dev);
1192
break;
1193
}
1194
return NOTIFY_OK;
1195
}
1196
1197
static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1198
.notifier_call = cacheinfo_cpu_callback,
1199
};
1200
1201
static int __cpuinit cache_sysfs_init(void)
1202
{
1203
int i;
1204
1205
if (num_cache_leaves == 0)
1206
return 0;
1207
1208
for_each_online_cpu(i) {
1209
int err;
1210
struct sys_device *sys_dev = get_cpu_sysdev(i);
1211
1212
err = cache_add_dev(sys_dev);
1213
if (err)
1214
return err;
1215
}
1216
register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1217
return 0;
1218
}
1219
1220
device_initcall(cache_sysfs_init);
1221
1222
#endif
1223
1224