Path: blob/main/sys/powerpc/powernv/platform_powernv.c
39507 views
/*-1* Copyright (c) 2015 Nathan Whitehorn2* Copyright (c) 2017-2018 Semihalf3* All rights reserved.4*5* Redistribution and use in source and binary forms, with or without6* modification, are permitted provided that the following conditions7* are met:8*9* 1. Redistributions of source code must retain the above copyright10* notice, this list of conditions and the following disclaimer.11* 2. Redistributions in binary form must reproduce the above copyright12* notice, this list of conditions and the following disclaimer in the13* documentation and/or other materials provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR16* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES17* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.18* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT20* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,21* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY22* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT23* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF24* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.25*/2627#include <sys/param.h>28#include <sys/systm.h>29#include <sys/kernel.h>30#include <sys/bus.h>31#include <sys/pcpu.h>32#include <sys/proc.h>33#include <sys/smp.h>34#include <vm/vm.h>35#include <vm/pmap.h>3637#include <machine/bus.h>38#include <machine/cpu.h>39#include <machine/hid.h>40#include <machine/platformvar.h>41#include <machine/pmap.h>42#include <machine/rtas.h>43#include <machine/smp.h>44#include <machine/spr.h>45#include <machine/trap.h>4647#include <dev/ofw/openfirm.h>48#include <dev/ofw/ofw_bus.h>49#include <dev/ofw/ofw_bus_subr.h>50#include <machine/ofw_machdep.h>51#include <powerpc/aim/mmu_oea64.h>5253#include "platform_if.h"54#include "opal.h"5556#ifdef SMP57extern void *ap_pcpu;58#endif5960void (*powernv_smp_ap_extra_init)(void);6162static int powernv_probe(platform_t);63static int powernv_attach(platform_t);64void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz,65struct mem_region *avail, int *availsz);66static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz);67static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref);68static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref);69static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref);70static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref);71static void powernv_smp_ap_init(platform_t);72#ifdef SMP73static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu);74static void powernv_smp_probe_threads(platform_t);75static struct cpu_group *powernv_smp_topo(platform_t plat);76#endif77static void powernv_reset(platform_t);78static void powernv_cpu_idle(sbintime_t sbt);79static int powernv_cpuref_init(void);80static int powernv_node_numa_domain(platform_t platform, phandle_t node);8182static platform_method_t powernv_methods[] = {83PLATFORMMETHOD(platform_probe, powernv_probe),84PLATFORMMETHOD(platform_attach, powernv_attach),85PLATFORMMETHOD(platform_mem_regions, powernv_mem_regions),86PLATFORMMETHOD(platform_numa_mem_regions, powernv_numa_mem_regions),87PLATFORMMETHOD(platform_timebase_freq, powernv_timebase_freq),8889PLATFORMMETHOD(platform_smp_ap_init, powernv_smp_ap_init),90PLATFORMMETHOD(platform_smp_first_cpu, powernv_smp_first_cpu),91PLATFORMMETHOD(platform_smp_next_cpu, powernv_smp_next_cpu),92PLATFORMMETHOD(platform_smp_get_bsp, powernv_smp_get_bsp),93#ifdef SMP94PLATFORMMETHOD(platform_smp_start_cpu, powernv_smp_start_cpu),95PLATFORMMETHOD(platform_smp_probe_threads, powernv_smp_probe_threads),96PLATFORMMETHOD(platform_smp_topo, powernv_smp_topo),97#endif98PLATFORMMETHOD(platform_node_numa_domain, powernv_node_numa_domain),99100PLATFORMMETHOD(platform_reset, powernv_reset),101{ 0, 0 }102};103104static platform_def_t powernv_platform = {105"powernv",106powernv_methods,1070108};109110static struct cpuref platform_cpuref[MAXCPU];111static int platform_cpuref_cnt;112static int platform_cpuref_valid;113static int platform_associativity;114115PLATFORM_DEF(powernv_platform);116117static uint64_t powernv_boot_pir;118119static int120powernv_probe(platform_t plat)121{122if (opal_check() == 0)123return (BUS_PROBE_SPECIFIC);124125return (ENXIO);126}127128static int129powernv_attach(platform_t plat)130{131uint32_t nptlp, shift = 0, slb_encoding = 0;132int32_t lp_size, lp_encoding;133char buf[255];134pcell_t refpoints[3];135pcell_t prop;136phandle_t cpu;137phandle_t opal;138int res, len, idx;139register_t msr;140register_t fscr;141bool has_lp;142143/* Ping OPAL again just to make sure */144opal_check();145146#if BYTE_ORDER == LITTLE_ENDIAN147opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */);148#else149opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */);150#endif151opal = OF_finddevice("/ibm,opal");152153platform_associativity = 4; /* Skiboot default. */154if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints,155sizeof(refpoints)) > 0) {156platform_associativity = refpoints[0];157}158159if (cpu_idle_hook == NULL)160cpu_idle_hook = powernv_cpu_idle;161162powernv_boot_pir = mfspr(SPR_PIR);163164/* LPID must not be altered when PSL_DR or PSL_IR is set */165msr = mfmsr();166mtmsr(msr & ~(PSL_DR | PSL_IR));167168/* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */169mtspr(SPR_LPID, 0);170isync();171172if (cpu_features2 & PPC_FEATURE2_ARCH_3_00)173lpcr |= LPCR_HVICE;174175#if BYTE_ORDER == LITTLE_ENDIAN176lpcr |= LPCR_ILE;177#endif178179mtspr(SPR_LPCR, lpcr);180isync();181182fscr = mfspr(SPR_HFSCR);183fscr |= FSCR_TAR | FSCR_EBB | HFSCR_BHRB | HFSCR_PM |184HFSCR_VECVSX | HFSCR_FP | FSCR_MSGP | FSCR_DSCR;185mtspr(SPR_HFSCR, fscr);186187mtmsr(msr);188189powernv_cpuref_init();190191/* Set SLB count from device tree */192cpu = OF_peer(0);193cpu = OF_child(cpu);194while (cpu != 0) {195res = OF_getprop(cpu, "name", buf, sizeof(buf));196if (res > 0 && strcmp(buf, "cpus") == 0)197break;198cpu = OF_peer(cpu);199}200if (cpu == 0)201goto out;202203cpu = OF_child(cpu);204while (cpu != 0) {205res = OF_getprop(cpu, "device_type", buf, sizeof(buf));206if (res > 0 && strcmp(buf, "cpu") == 0)207break;208cpu = OF_peer(cpu);209}210if (cpu == 0)211goto out;212213res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop));214if (res > 0)215n_slbs = prop;216217/*218* Scan the large page size property for PAPR compatible machines.219* See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties'220* for the encoding of the property.221*/222223len = OF_getproplen(cpu, "ibm,segment-page-sizes");224if (len > 0) {225/*226* We have to use a variable length array on the stack227* since we have very limited stack space.228*/229pcell_t arr[len/sizeof(cell_t)];230res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr,231sizeof(arr));232len /= 4;233idx = 0;234has_lp = false;235while (len > 0) {236shift = arr[idx];237slb_encoding = arr[idx + 1];238nptlp = arr[idx + 2];239idx += 3;240len -= 3;241while (len > 0 && nptlp) {242lp_size = arr[idx];243lp_encoding = arr[idx+1];244if (slb_encoding == SLBV_L && lp_encoding == 0)245has_lp = true;246247if (slb_encoding == SLB_PGSZ_4K_4K &&248lp_encoding == LP_4K_16M)249moea64_has_lp_4k_16m = true;250251idx += 2;252len -= 2;253nptlp--;254}255if (has_lp && moea64_has_lp_4k_16m)256break;257}258259if (!has_lp)260panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "261"not supported by this system.");262263moea64_large_page_shift = shift;264moea64_large_page_size = 1ULL << lp_size;265}266267out:268return (0);269}270271void272powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,273struct mem_region *avail, int *availsz)274{275276ofw_mem_regions(phys, physsz, avail, availsz);277}278279static void280powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz)281{282283ofw_numa_mem_regions(phys, physsz);284}285286static u_long287powernv_timebase_freq(platform_t plat, struct cpuref *cpuref)288{289char buf[8];290phandle_t cpu, dev, root;291int res;292int32_t ticks = -1;293294root = OF_peer(0);295dev = OF_child(root);296while (dev != 0) {297res = OF_getprop(dev, "name", buf, sizeof(buf));298if (res > 0 && strcmp(buf, "cpus") == 0)299break;300dev = OF_peer(dev);301}302303for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {304res = OF_getprop(cpu, "device_type", buf, sizeof(buf));305if (res > 0 && strcmp(buf, "cpu") == 0)306break;307}308if (cpu == 0)309return (512000000);310311OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks));312313if (ticks <= 0)314panic("Unable to determine timebase frequency!");315316return (ticks);317318}319320static int321powernv_cpuref_init(void)322{323phandle_t cpu, dev;324char buf[32];325int a, res, tmp_cpuref_cnt;326static struct cpuref tmp_cpuref[MAXCPU];327cell_t interrupt_servers[32];328uint64_t bsp;329330if (platform_cpuref_valid)331return (0);332333dev = OF_peer(0);334dev = OF_child(dev);335while (dev != 0) {336res = OF_getprop(dev, "name", buf, sizeof(buf));337if (res > 0 && strcmp(buf, "cpus") == 0)338break;339dev = OF_peer(dev);340}341342bsp = 0;343tmp_cpuref_cnt = 0;344for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {345res = OF_getprop(cpu, "device_type", buf, sizeof(buf));346if (res > 0 && strcmp(buf, "cpu") == 0) {347if (!ofw_bus_node_status_okay(cpu))348continue;349res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");350if (res > 0) {351OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",352interrupt_servers, res);353354for (a = 0; a < res/sizeof(cell_t); a++) {355tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];356tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;357tmp_cpuref[tmp_cpuref_cnt].cr_domain =358powernv_node_numa_domain(NULL, cpu);359if (interrupt_servers[a] == (uint32_t)powernv_boot_pir)360bsp = tmp_cpuref_cnt;361362tmp_cpuref_cnt++;363}364}365}366}367368/* Map IDs, so BSP has CPUID 0 regardless of hwref */369for (a = bsp; a < tmp_cpuref_cnt; a++) {370platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;371platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;372platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;373platform_cpuref_cnt++;374}375for (a = 0; a < bsp; a++) {376platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;377platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;378platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;379platform_cpuref_cnt++;380}381382platform_cpuref_valid = 1;383384return (0);385}386387static int388powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref)389{390if (platform_cpuref_valid == 0)391return (EINVAL);392393cpuref->cr_cpuid = 0;394cpuref->cr_hwref = platform_cpuref[0].cr_hwref;395cpuref->cr_domain = platform_cpuref[0].cr_domain;396397return (0);398}399400static int401powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref)402{403int id;404405if (platform_cpuref_valid == 0)406return (EINVAL);407408id = cpuref->cr_cpuid + 1;409if (id >= platform_cpuref_cnt)410return (ENOENT);411412cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;413cpuref->cr_hwref = platform_cpuref[id].cr_hwref;414cpuref->cr_domain = platform_cpuref[id].cr_domain;415416return (0);417}418419static int420powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref)421{422423cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;424cpuref->cr_hwref = platform_cpuref[0].cr_hwref;425cpuref->cr_domain = platform_cpuref[0].cr_domain;426return (0);427}428429#ifdef SMP430static int431powernv_smp_start_cpu(platform_t plat, struct pcpu *pc)432{433int result;434435ap_pcpu = pc;436powerpc_sync();437438result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST);439if (result != OPAL_SUCCESS) {440printf("OPAL error (%d): unable to start AP %d\n",441result, (int)pc->pc_hwref);442return (ENXIO);443}444445return (0);446}447448static void449powernv_smp_probe_threads(platform_t plat)450{451char buf[8];452phandle_t cpu, dev, root;453int res, nthreads;454455root = OF_peer(0);456457dev = OF_child(root);458while (dev != 0) {459res = OF_getprop(dev, "name", buf, sizeof(buf));460if (res > 0 && strcmp(buf, "cpus") == 0)461break;462dev = OF_peer(dev);463}464465nthreads = 1;466for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {467res = OF_getprop(cpu, "device_type", buf, sizeof(buf));468if (res <= 0 || strcmp(buf, "cpu") != 0)469continue;470471res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");472473if (res >= 0)474nthreads = res / sizeof(cell_t);475else476nthreads = 1;477break;478}479480smp_threads_per_core = nthreads;481if (mp_ncpus % nthreads == 0)482mp_ncores = mp_ncpus / nthreads;483}484485static struct cpu_group *486cpu_group_init(struct cpu_group *group, struct cpu_group *parent,487const cpuset_t *cpus, int children, int level, int flags)488{489struct cpu_group *child;490491child = children != 0 ? smp_topo_alloc(children) : NULL;492493group->cg_parent = parent;494group->cg_child = child;495CPU_COPY(cpus, &group->cg_mask);496group->cg_count = CPU_COUNT(cpus);497group->cg_children = children;498group->cg_level = level;499group->cg_flags = flags;500501return (child);502}503504static struct cpu_group *505powernv_smp_topo(platform_t plat)506{507struct cpu_group *core, *dom, *root;508cpuset_t corecpus, domcpus;509int cpuid, i, j, k, ncores;510511if (mp_ncpus % smp_threads_per_core != 0) {512printf("%s: irregular SMP topology (%d threads, %d per core)\n",513__func__, mp_ncpus, smp_threads_per_core);514return (smp_topo_none());515}516517root = smp_topo_alloc(1);518dom = cpu_group_init(root, NULL, &all_cpus, vm_ndomains, CG_SHARE_NONE,5190);520521/*522* Redundant layers will be collapsed by the caller so we don't need a523* special case for a single domain.524*/525for (i = 0; i < vm_ndomains; i++, dom++) {526CPU_COPY(&cpuset_domain[i], &domcpus);527ncores = CPU_COUNT(&domcpus) / smp_threads_per_core;528KASSERT(CPU_COUNT(&domcpus) % smp_threads_per_core == 0,529("%s: domain %d core count not divisible by thread count",530__func__, i));531532core = cpu_group_init(dom, root, &domcpus, ncores, CG_SHARE_L3,5330);534for (j = 0; j < ncores; j++, core++) {535/*536* Assume that consecutive CPU IDs correspond to sibling537* threads.538*/539CPU_ZERO(&corecpus);540for (k = 0; k < smp_threads_per_core; k++) {541cpuid = CPU_FFS(&domcpus) - 1;542CPU_CLR(cpuid, &domcpus);543CPU_SET(cpuid, &corecpus);544}545(void)cpu_group_init(core, dom, &corecpus, 0,546CG_SHARE_L1, CG_FLAG_SMT);547}548}549550return (root);551}552553#endif554555static void556powernv_reset(platform_t platform)557{558559opal_call(OPAL_CEC_REBOOT);560}561562static void563powernv_smp_ap_init(platform_t platform)564{565566if (powernv_smp_ap_extra_init != NULL)567powernv_smp_ap_extra_init();568}569570static void571powernv_cpu_idle(sbintime_t sbt)572{573}574575static int576powernv_node_numa_domain(platform_t platform, phandle_t node)577{578/* XXX: Is locking necessary in here? */579static int numa_domains[MAXMEMDOM];580static int numa_max_domain;581cell_t associativity[5];582int i, res;583584#ifndef NUMA585return (0);586#endif587i = 0;588TUNABLE_INT_FETCH("vm.numa.disabled", &i);589if (i)590return (0);591592res = OF_getencprop(node, "ibm,associativity",593associativity, sizeof(associativity));594595/*596* If this node doesn't have associativity, or if there are not597* enough elements in it, check its parent.598*/599if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) {600node = OF_parent(node);601/* If already at the root, use default domain. */602if (node == 0)603return (0);604return (powernv_node_numa_domain(platform, node));605}606607for (i = 0; i < numa_max_domain; i++) {608if (numa_domains[i] == associativity[platform_associativity])609return (i);610}611if (i < MAXMEMDOM)612numa_domains[numa_max_domain++] =613associativity[platform_associativity];614else615i = 0;616617return (i);618}619620/* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */621static void622powernv_setup_nmmu(void *unused)623{624if (opal_check() != 0)625return;626opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR));627}628629SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL);630631632