Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/powerpc/powernv/platform_powernv.c
39507 views
1
/*-
2
* Copyright (c) 2015 Nathan Whitehorn
3
* Copyright (c) 2017-2018 Semihalf
4
* All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
*
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#include <sys/param.h>
29
#include <sys/systm.h>
30
#include <sys/kernel.h>
31
#include <sys/bus.h>
32
#include <sys/pcpu.h>
33
#include <sys/proc.h>
34
#include <sys/smp.h>
35
#include <vm/vm.h>
36
#include <vm/pmap.h>
37
38
#include <machine/bus.h>
39
#include <machine/cpu.h>
40
#include <machine/hid.h>
41
#include <machine/platformvar.h>
42
#include <machine/pmap.h>
43
#include <machine/rtas.h>
44
#include <machine/smp.h>
45
#include <machine/spr.h>
46
#include <machine/trap.h>
47
48
#include <dev/ofw/openfirm.h>
49
#include <dev/ofw/ofw_bus.h>
50
#include <dev/ofw/ofw_bus_subr.h>
51
#include <machine/ofw_machdep.h>
52
#include <powerpc/aim/mmu_oea64.h>
53
54
#include "platform_if.h"
55
#include "opal.h"
56
57
#ifdef SMP
58
extern void *ap_pcpu;
59
#endif
60
61
void (*powernv_smp_ap_extra_init)(void);
62
63
static int powernv_probe(platform_t);
64
static int powernv_attach(platform_t);
65
void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz,
66
struct mem_region *avail, int *availsz);
67
static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz);
68
static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref);
69
static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref);
70
static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref);
71
static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref);
72
static void powernv_smp_ap_init(platform_t);
73
#ifdef SMP
74
static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu);
75
static void powernv_smp_probe_threads(platform_t);
76
static struct cpu_group *powernv_smp_topo(platform_t plat);
77
#endif
78
static void powernv_reset(platform_t);
79
static void powernv_cpu_idle(sbintime_t sbt);
80
static int powernv_cpuref_init(void);
81
static int powernv_node_numa_domain(platform_t platform, phandle_t node);
82
83
static platform_method_t powernv_methods[] = {
84
PLATFORMMETHOD(platform_probe, powernv_probe),
85
PLATFORMMETHOD(platform_attach, powernv_attach),
86
PLATFORMMETHOD(platform_mem_regions, powernv_mem_regions),
87
PLATFORMMETHOD(platform_numa_mem_regions, powernv_numa_mem_regions),
88
PLATFORMMETHOD(platform_timebase_freq, powernv_timebase_freq),
89
90
PLATFORMMETHOD(platform_smp_ap_init, powernv_smp_ap_init),
91
PLATFORMMETHOD(platform_smp_first_cpu, powernv_smp_first_cpu),
92
PLATFORMMETHOD(platform_smp_next_cpu, powernv_smp_next_cpu),
93
PLATFORMMETHOD(platform_smp_get_bsp, powernv_smp_get_bsp),
94
#ifdef SMP
95
PLATFORMMETHOD(platform_smp_start_cpu, powernv_smp_start_cpu),
96
PLATFORMMETHOD(platform_smp_probe_threads, powernv_smp_probe_threads),
97
PLATFORMMETHOD(platform_smp_topo, powernv_smp_topo),
98
#endif
99
PLATFORMMETHOD(platform_node_numa_domain, powernv_node_numa_domain),
100
101
PLATFORMMETHOD(platform_reset, powernv_reset),
102
{ 0, 0 }
103
};
104
105
static platform_def_t powernv_platform = {
106
"powernv",
107
powernv_methods,
108
0
109
};
110
111
static struct cpuref platform_cpuref[MAXCPU];
112
static int platform_cpuref_cnt;
113
static int platform_cpuref_valid;
114
static int platform_associativity;
115
116
PLATFORM_DEF(powernv_platform);
117
118
static uint64_t powernv_boot_pir;
119
120
static int
121
powernv_probe(platform_t plat)
122
{
123
if (opal_check() == 0)
124
return (BUS_PROBE_SPECIFIC);
125
126
return (ENXIO);
127
}
128
129
static int
130
powernv_attach(platform_t plat)
131
{
132
uint32_t nptlp, shift = 0, slb_encoding = 0;
133
int32_t lp_size, lp_encoding;
134
char buf[255];
135
pcell_t refpoints[3];
136
pcell_t prop;
137
phandle_t cpu;
138
phandle_t opal;
139
int res, len, idx;
140
register_t msr;
141
register_t fscr;
142
bool has_lp;
143
144
/* Ping OPAL again just to make sure */
145
opal_check();
146
147
#if BYTE_ORDER == LITTLE_ENDIAN
148
opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */);
149
#else
150
opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */);
151
#endif
152
opal = OF_finddevice("/ibm,opal");
153
154
platform_associativity = 4; /* Skiboot default. */
155
if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints,
156
sizeof(refpoints)) > 0) {
157
platform_associativity = refpoints[0];
158
}
159
160
if (cpu_idle_hook == NULL)
161
cpu_idle_hook = powernv_cpu_idle;
162
163
powernv_boot_pir = mfspr(SPR_PIR);
164
165
/* LPID must not be altered when PSL_DR or PSL_IR is set */
166
msr = mfmsr();
167
mtmsr(msr & ~(PSL_DR | PSL_IR));
168
169
/* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */
170
mtspr(SPR_LPID, 0);
171
isync();
172
173
if (cpu_features2 & PPC_FEATURE2_ARCH_3_00)
174
lpcr |= LPCR_HVICE;
175
176
#if BYTE_ORDER == LITTLE_ENDIAN
177
lpcr |= LPCR_ILE;
178
#endif
179
180
mtspr(SPR_LPCR, lpcr);
181
isync();
182
183
fscr = mfspr(SPR_HFSCR);
184
fscr |= FSCR_TAR | FSCR_EBB | HFSCR_BHRB | HFSCR_PM |
185
HFSCR_VECVSX | HFSCR_FP | FSCR_MSGP | FSCR_DSCR;
186
mtspr(SPR_HFSCR, fscr);
187
188
mtmsr(msr);
189
190
powernv_cpuref_init();
191
192
/* Set SLB count from device tree */
193
cpu = OF_peer(0);
194
cpu = OF_child(cpu);
195
while (cpu != 0) {
196
res = OF_getprop(cpu, "name", buf, sizeof(buf));
197
if (res > 0 && strcmp(buf, "cpus") == 0)
198
break;
199
cpu = OF_peer(cpu);
200
}
201
if (cpu == 0)
202
goto out;
203
204
cpu = OF_child(cpu);
205
while (cpu != 0) {
206
res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
207
if (res > 0 && strcmp(buf, "cpu") == 0)
208
break;
209
cpu = OF_peer(cpu);
210
}
211
if (cpu == 0)
212
goto out;
213
214
res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop));
215
if (res > 0)
216
n_slbs = prop;
217
218
/*
219
* Scan the large page size property for PAPR compatible machines.
220
* See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties'
221
* for the encoding of the property.
222
*/
223
224
len = OF_getproplen(cpu, "ibm,segment-page-sizes");
225
if (len > 0) {
226
/*
227
* We have to use a variable length array on the stack
228
* since we have very limited stack space.
229
*/
230
pcell_t arr[len/sizeof(cell_t)];
231
res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr,
232
sizeof(arr));
233
len /= 4;
234
idx = 0;
235
has_lp = false;
236
while (len > 0) {
237
shift = arr[idx];
238
slb_encoding = arr[idx + 1];
239
nptlp = arr[idx + 2];
240
idx += 3;
241
len -= 3;
242
while (len > 0 && nptlp) {
243
lp_size = arr[idx];
244
lp_encoding = arr[idx+1];
245
if (slb_encoding == SLBV_L && lp_encoding == 0)
246
has_lp = true;
247
248
if (slb_encoding == SLB_PGSZ_4K_4K &&
249
lp_encoding == LP_4K_16M)
250
moea64_has_lp_4k_16m = true;
251
252
idx += 2;
253
len -= 2;
254
nptlp--;
255
}
256
if (has_lp && moea64_has_lp_4k_16m)
257
break;
258
}
259
260
if (!has_lp)
261
panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "
262
"not supported by this system.");
263
264
moea64_large_page_shift = shift;
265
moea64_large_page_size = 1ULL << lp_size;
266
}
267
268
out:
269
return (0);
270
}
271
272
void
273
powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
274
struct mem_region *avail, int *availsz)
275
{
276
277
ofw_mem_regions(phys, physsz, avail, availsz);
278
}
279
280
static void
281
powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz)
282
{
283
284
ofw_numa_mem_regions(phys, physsz);
285
}
286
287
static u_long
288
powernv_timebase_freq(platform_t plat, struct cpuref *cpuref)
289
{
290
char buf[8];
291
phandle_t cpu, dev, root;
292
int res;
293
int32_t ticks = -1;
294
295
root = OF_peer(0);
296
dev = OF_child(root);
297
while (dev != 0) {
298
res = OF_getprop(dev, "name", buf, sizeof(buf));
299
if (res > 0 && strcmp(buf, "cpus") == 0)
300
break;
301
dev = OF_peer(dev);
302
}
303
304
for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
305
res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
306
if (res > 0 && strcmp(buf, "cpu") == 0)
307
break;
308
}
309
if (cpu == 0)
310
return (512000000);
311
312
OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks));
313
314
if (ticks <= 0)
315
panic("Unable to determine timebase frequency!");
316
317
return (ticks);
318
319
}
320
321
static int
322
powernv_cpuref_init(void)
323
{
324
phandle_t cpu, dev;
325
char buf[32];
326
int a, res, tmp_cpuref_cnt;
327
static struct cpuref tmp_cpuref[MAXCPU];
328
cell_t interrupt_servers[32];
329
uint64_t bsp;
330
331
if (platform_cpuref_valid)
332
return (0);
333
334
dev = OF_peer(0);
335
dev = OF_child(dev);
336
while (dev != 0) {
337
res = OF_getprop(dev, "name", buf, sizeof(buf));
338
if (res > 0 && strcmp(buf, "cpus") == 0)
339
break;
340
dev = OF_peer(dev);
341
}
342
343
bsp = 0;
344
tmp_cpuref_cnt = 0;
345
for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
346
res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
347
if (res > 0 && strcmp(buf, "cpu") == 0) {
348
if (!ofw_bus_node_status_okay(cpu))
349
continue;
350
res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
351
if (res > 0) {
352
OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
353
interrupt_servers, res);
354
355
for (a = 0; a < res/sizeof(cell_t); a++) {
356
tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
357
tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
358
tmp_cpuref[tmp_cpuref_cnt].cr_domain =
359
powernv_node_numa_domain(NULL, cpu);
360
if (interrupt_servers[a] == (uint32_t)powernv_boot_pir)
361
bsp = tmp_cpuref_cnt;
362
363
tmp_cpuref_cnt++;
364
}
365
}
366
}
367
}
368
369
/* Map IDs, so BSP has CPUID 0 regardless of hwref */
370
for (a = bsp; a < tmp_cpuref_cnt; a++) {
371
platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
372
platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
373
platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
374
platform_cpuref_cnt++;
375
}
376
for (a = 0; a < bsp; a++) {
377
platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
378
platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
379
platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
380
platform_cpuref_cnt++;
381
}
382
383
platform_cpuref_valid = 1;
384
385
return (0);
386
}
387
388
static int
389
powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
390
{
391
if (platform_cpuref_valid == 0)
392
return (EINVAL);
393
394
cpuref->cr_cpuid = 0;
395
cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
396
cpuref->cr_domain = platform_cpuref[0].cr_domain;
397
398
return (0);
399
}
400
401
static int
402
powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
403
{
404
int id;
405
406
if (platform_cpuref_valid == 0)
407
return (EINVAL);
408
409
id = cpuref->cr_cpuid + 1;
410
if (id >= platform_cpuref_cnt)
411
return (ENOENT);
412
413
cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
414
cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
415
cpuref->cr_domain = platform_cpuref[id].cr_domain;
416
417
return (0);
418
}
419
420
static int
421
powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
422
{
423
424
cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
425
cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
426
cpuref->cr_domain = platform_cpuref[0].cr_domain;
427
return (0);
428
}
429
430
#ifdef SMP
431
static int
432
powernv_smp_start_cpu(platform_t plat, struct pcpu *pc)
433
{
434
int result;
435
436
ap_pcpu = pc;
437
powerpc_sync();
438
439
result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST);
440
if (result != OPAL_SUCCESS) {
441
printf("OPAL error (%d): unable to start AP %d\n",
442
result, (int)pc->pc_hwref);
443
return (ENXIO);
444
}
445
446
return (0);
447
}
448
449
static void
450
powernv_smp_probe_threads(platform_t plat)
451
{
452
char buf[8];
453
phandle_t cpu, dev, root;
454
int res, nthreads;
455
456
root = OF_peer(0);
457
458
dev = OF_child(root);
459
while (dev != 0) {
460
res = OF_getprop(dev, "name", buf, sizeof(buf));
461
if (res > 0 && strcmp(buf, "cpus") == 0)
462
break;
463
dev = OF_peer(dev);
464
}
465
466
nthreads = 1;
467
for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
468
res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
469
if (res <= 0 || strcmp(buf, "cpu") != 0)
470
continue;
471
472
res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
473
474
if (res >= 0)
475
nthreads = res / sizeof(cell_t);
476
else
477
nthreads = 1;
478
break;
479
}
480
481
smp_threads_per_core = nthreads;
482
if (mp_ncpus % nthreads == 0)
483
mp_ncores = mp_ncpus / nthreads;
484
}
485
486
static struct cpu_group *
487
cpu_group_init(struct cpu_group *group, struct cpu_group *parent,
488
const cpuset_t *cpus, int children, int level, int flags)
489
{
490
struct cpu_group *child;
491
492
child = children != 0 ? smp_topo_alloc(children) : NULL;
493
494
group->cg_parent = parent;
495
group->cg_child = child;
496
CPU_COPY(cpus, &group->cg_mask);
497
group->cg_count = CPU_COUNT(cpus);
498
group->cg_children = children;
499
group->cg_level = level;
500
group->cg_flags = flags;
501
502
return (child);
503
}
504
505
static struct cpu_group *
506
powernv_smp_topo(platform_t plat)
507
{
508
struct cpu_group *core, *dom, *root;
509
cpuset_t corecpus, domcpus;
510
int cpuid, i, j, k, ncores;
511
512
if (mp_ncpus % smp_threads_per_core != 0) {
513
printf("%s: irregular SMP topology (%d threads, %d per core)\n",
514
__func__, mp_ncpus, smp_threads_per_core);
515
return (smp_topo_none());
516
}
517
518
root = smp_topo_alloc(1);
519
dom = cpu_group_init(root, NULL, &all_cpus, vm_ndomains, CG_SHARE_NONE,
520
0);
521
522
/*
523
* Redundant layers will be collapsed by the caller so we don't need a
524
* special case for a single domain.
525
*/
526
for (i = 0; i < vm_ndomains; i++, dom++) {
527
CPU_COPY(&cpuset_domain[i], &domcpus);
528
ncores = CPU_COUNT(&domcpus) / smp_threads_per_core;
529
KASSERT(CPU_COUNT(&domcpus) % smp_threads_per_core == 0,
530
("%s: domain %d core count not divisible by thread count",
531
__func__, i));
532
533
core = cpu_group_init(dom, root, &domcpus, ncores, CG_SHARE_L3,
534
0);
535
for (j = 0; j < ncores; j++, core++) {
536
/*
537
* Assume that consecutive CPU IDs correspond to sibling
538
* threads.
539
*/
540
CPU_ZERO(&corecpus);
541
for (k = 0; k < smp_threads_per_core; k++) {
542
cpuid = CPU_FFS(&domcpus) - 1;
543
CPU_CLR(cpuid, &domcpus);
544
CPU_SET(cpuid, &corecpus);
545
}
546
(void)cpu_group_init(core, dom, &corecpus, 0,
547
CG_SHARE_L1, CG_FLAG_SMT);
548
}
549
}
550
551
return (root);
552
}
553
554
#endif
555
556
static void
557
powernv_reset(platform_t platform)
558
{
559
560
opal_call(OPAL_CEC_REBOOT);
561
}
562
563
static void
564
powernv_smp_ap_init(platform_t platform)
565
{
566
567
if (powernv_smp_ap_extra_init != NULL)
568
powernv_smp_ap_extra_init();
569
}
570
571
static void
572
powernv_cpu_idle(sbintime_t sbt)
573
{
574
}
575
576
static int
577
powernv_node_numa_domain(platform_t platform, phandle_t node)
578
{
579
/* XXX: Is locking necessary in here? */
580
static int numa_domains[MAXMEMDOM];
581
static int numa_max_domain;
582
cell_t associativity[5];
583
int i, res;
584
585
#ifndef NUMA
586
return (0);
587
#endif
588
i = 0;
589
TUNABLE_INT_FETCH("vm.numa.disabled", &i);
590
if (i)
591
return (0);
592
593
res = OF_getencprop(node, "ibm,associativity",
594
associativity, sizeof(associativity));
595
596
/*
597
* If this node doesn't have associativity, or if there are not
598
* enough elements in it, check its parent.
599
*/
600
if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) {
601
node = OF_parent(node);
602
/* If already at the root, use default domain. */
603
if (node == 0)
604
return (0);
605
return (powernv_node_numa_domain(platform, node));
606
}
607
608
for (i = 0; i < numa_max_domain; i++) {
609
if (numa_domains[i] == associativity[platform_associativity])
610
return (i);
611
}
612
if (i < MAXMEMDOM)
613
numa_domains[numa_max_domain++] =
614
associativity[platform_associativity];
615
else
616
i = 0;
617
618
return (i);
619
}
620
621
/* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */
622
static void
623
powernv_setup_nmmu(void *unused)
624
{
625
if (opal_check() != 0)
626
return;
627
opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR));
628
}
629
630
SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL);
631
632