// SPDX-License-Identifier: LGPL-2.0+1/* Generic MTRR (Memory Type Range Register) driver.23Copyright (C) 1997-2000 Richard Gooch4Copyright (c) 2002 Patrick Mochel56Richard Gooch may be reached by email at [email protected]7The postal address is:8Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.910Source: "Pentium Pro Family Developer's Manual, Volume 3:11Operating System Writer's Guide" (Intel document number 242692),12section 11.11.71314This was cleaned and made readable by Patrick Mochel <[email protected]>15on 6-7 March 2002.16Source: Intel Architecture Software Developers Manual, Volume 3:17System Programming Guide; Section 9.11. (1997 edition - PPro).18*/1920#include <linux/types.h> /* FIXME: kvm_para.h needs this */2122#include <linux/stop_machine.h>23#include <linux/kvm_para.h>24#include <linux/uaccess.h>25#include <linux/export.h>26#include <linux/mutex.h>27#include <linux/init.h>28#include <linux/sort.h>29#include <linux/cpu.h>30#include <linux/pci.h>31#include <linux/smp.h>32#include <linux/syscore_ops.h>33#include <linux/rcupdate.h>3435#include <asm/cacheinfo.h>36#include <asm/cpufeature.h>37#include <asm/e820/api.h>38#include <asm/mtrr.h>39#include <asm/msr.h>40#include <asm/memtype.h>4142#include "mtrr.h"4344static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE);45static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB);46static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH);47static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT);48static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK);4950/* arch_phys_wc_add returns an MTRR register index plus this offset. */51#define MTRR_TO_PHYS_WC_OFFSET 10005253u32 num_var_ranges;5455unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];56DEFINE_MUTEX(mtrr_mutex);5758const struct mtrr_ops *mtrr_if;5960/* Returns non-zero if we have the write-combining memory type */61static int have_wrcomb(void)62{63struct pci_dev *dev;6465dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);66if (dev != NULL) {67/*68* ServerWorks LE chipsets < rev 6 have problems with69* write-combining. Don't allow it and leave room for other70* chipsets to be tagged71*/72if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&73dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&74dev->revision <= 5) {75pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");76pci_dev_put(dev);77return 0;78}79/*80* Intel 450NX errata # 23. Non ascending cacheline evictions to81* write combining memory may resulting in data corruption82*/83if (dev->vendor == PCI_VENDOR_ID_INTEL &&84dev->device == PCI_DEVICE_ID_INTEL_82451NX) {85pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");86pci_dev_put(dev);87return 0;88}89pci_dev_put(dev);90}91return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;92}9394static void __init init_table(void)95{96int i, max;9798max = num_var_ranges;99for (i = 0; i < max; i++)100mtrr_usage_table[i] = 1;101}102103struct set_mtrr_data {104unsigned long smp_base;105unsigned long smp_size;106unsigned int smp_reg;107mtrr_type smp_type;108};109110/**111* mtrr_rendezvous_handler - Work done in the synchronization handler. Executed112* by all the CPUs.113* @info: pointer to mtrr configuration data114*115* Returns nothing.116*/117static int mtrr_rendezvous_handler(void *info)118{119struct set_mtrr_data *data = info;120121mtrr_if->set(data->smp_reg, data->smp_base,122data->smp_size, data->smp_type);123return 0;124}125126static inline int types_compatible(mtrr_type type1, mtrr_type type2)127{128return type1 == MTRR_TYPE_UNCACHABLE ||129type2 == MTRR_TYPE_UNCACHABLE ||130(type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||131(type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);132}133134/**135* set_mtrr - update mtrrs on all processors136* @reg: mtrr in question137* @base: mtrr base138* @size: mtrr size139* @type: mtrr type140*141* This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:142*143* 1. Queue work to do the following on all processors:144* 2. Disable Interrupts145* 3. Wait for all procs to do so146* 4. Enter no-fill cache mode147* 5. Flush caches148* 6. Clear PGE bit149* 7. Flush all TLBs150* 8. Disable all range registers151* 9. Update the MTRRs152* 10. Enable all range registers153* 11. Flush all TLBs and caches again154* 12. Enter normal cache mode and reenable caching155* 13. Set PGE156* 14. Wait for buddies to catch up157* 15. Enable interrupts.158*159* What does that mean for us? Well, stop_machine() will ensure that160* the rendezvous handler is started on each CPU. And in lockstep they161* do the state transition of disabling interrupts, updating MTRR's162* (the CPU vendors may each do it differently, so we call mtrr_if->set()163* callback and let them take care of it.) and enabling interrupts.164*165* Note that the mechanism is the same for UP systems, too; all the SMP stuff166* becomes nops.167*/168static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size,169mtrr_type type)170{171struct set_mtrr_data data = { .smp_reg = reg,172.smp_base = base,173.smp_size = size,174.smp_type = type175};176177stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);178179generic_rebuild_map();180}181182/**183* mtrr_add_page - Add a memory type region184* @base: Physical base address of region in pages (in units of 4 kB!)185* @size: Physical size of region in pages (4 kB)186* @type: Type of MTRR desired187* @increment: If this is true do usage counting on the region188*189* Memory type region registers control the caching on newer Intel and190* non Intel processors. This function allows drivers to request an191* MTRR is added. The details and hardware specifics of each processor's192* implementation are hidden from the caller, but nevertheless the193* caller should expect to need to provide a power of two size on an194* equivalent power of two boundary.195*196* If the region cannot be added either because all regions are in use197* or the CPU cannot support it a negative value is returned. On success198* the register number for this entry is returned, but should be treated199* as a cookie only.200*201* On a multiprocessor machine the changes are made to all processors.202* This is required on x86 by the Intel processors.203*204* The available types are205*206* %MTRR_TYPE_UNCACHABLE - No caching207*208* %MTRR_TYPE_WRBACK - Write data back in bursts whenever209*210* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts211*212* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes213*214* BUGS: Needs a quiet flag for the cases where drivers do not mind215* failures and do not wish system log messages to be sent.216*/217int mtrr_add_page(unsigned long base, unsigned long size,218unsigned int type, bool increment)219{220unsigned long lbase, lsize;221int i, replace, error;222mtrr_type ltype;223224if (!mtrr_enabled())225return -ENXIO;226227error = mtrr_if->validate_add_page(base, size, type);228if (error)229return error;230231if (type >= MTRR_NUM_TYPES) {232pr_warn("type: %u invalid\n", type);233return -EINVAL;234}235236/* If the type is WC, check that this processor supports it */237if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {238pr_warn("your processor doesn't support write-combining\n");239return -ENOSYS;240}241242if (!size) {243pr_warn("zero sized request\n");244return -EINVAL;245}246247if ((base | (base + size - 1)) >>248(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {249pr_warn("base or size exceeds the MTRR width\n");250return -EINVAL;251}252253error = -EINVAL;254replace = -1;255256/* No CPU hotplug when we change MTRR entries */257cpus_read_lock();258259/* Search for existing MTRR */260mutex_lock(&mtrr_mutex);261for (i = 0; i < num_var_ranges; ++i) {262mtrr_if->get(i, &lbase, &lsize, <ype);263if (!lsize || base > lbase + lsize - 1 ||264base + size - 1 < lbase)265continue;266/*267* At this point we know there is some kind of268* overlap/enclosure269*/270if (base < lbase || base + size - 1 > lbase + lsize - 1) {271if (base <= lbase &&272base + size - 1 >= lbase + lsize - 1) {273/* New region encloses an existing region */274if (type == ltype) {275replace = replace == -1 ? i : -2;276continue;277} else if (types_compatible(type, ltype))278continue;279}280pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,281lsize);282goto out;283}284/* New region is enclosed by an existing region */285if (ltype != type) {286if (types_compatible(type, ltype))287continue;288pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",289base, size, mtrr_attrib_to_str(ltype),290mtrr_attrib_to_str(type));291goto out;292}293if (increment)294++mtrr_usage_table[i];295error = i;296goto out;297}298/* Search for an empty MTRR */299i = mtrr_if->get_free_region(base, size, replace);300if (i >= 0) {301set_mtrr(i, base, size, type);302if (likely(replace < 0)) {303mtrr_usage_table[i] = 1;304} else {305mtrr_usage_table[i] = mtrr_usage_table[replace];306if (increment)307mtrr_usage_table[i]++;308if (unlikely(replace != i)) {309set_mtrr(replace, 0, 0, 0);310mtrr_usage_table[replace] = 0;311}312}313} else {314pr_info("no more MTRRs available\n");315}316error = i;317out:318mutex_unlock(&mtrr_mutex);319cpus_read_unlock();320return error;321}322323static int mtrr_check(unsigned long base, unsigned long size)324{325if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {326pr_warn("size and base must be multiples of 4 kiB\n");327Dprintk("size: 0x%lx base: 0x%lx\n", size, base);328dump_stack();329return -1;330}331return 0;332}333334/**335* mtrr_add - Add a memory type region336* @base: Physical base address of region337* @size: Physical size of region338* @type: Type of MTRR desired339* @increment: If this is true do usage counting on the region340*341* Memory type region registers control the caching on newer Intel and342* non Intel processors. This function allows drivers to request an343* MTRR is added. The details and hardware specifics of each processor's344* implementation are hidden from the caller, but nevertheless the345* caller should expect to need to provide a power of two size on an346* equivalent power of two boundary.347*348* If the region cannot be added either because all regions are in use349* or the CPU cannot support it a negative value is returned. On success350* the register number for this entry is returned, but should be treated351* as a cookie only.352*353* On a multiprocessor machine the changes are made to all processors.354* This is required on x86 by the Intel processors.355*356* The available types are357*358* %MTRR_TYPE_UNCACHABLE - No caching359*360* %MTRR_TYPE_WRBACK - Write data back in bursts whenever361*362* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts363*364* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes365*366* BUGS: Needs a quiet flag for the cases where drivers do not mind367* failures and do not wish system log messages to be sent.368*/369int mtrr_add(unsigned long base, unsigned long size, unsigned int type,370bool increment)371{372if (!mtrr_enabled())373return -ENODEV;374if (mtrr_check(base, size))375return -EINVAL;376return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,377increment);378}379380/**381* mtrr_del_page - delete a memory type region382* @reg: Register returned by mtrr_add383* @base: Physical base address384* @size: Size of region385*386* If register is supplied then base and size are ignored. This is387* how drivers should call it.388*389* Releases an MTRR region. If the usage count drops to zero the390* register is freed and the region returns to default state.391* On success the register is returned, on failure a negative error392* code.393*/394int mtrr_del_page(int reg, unsigned long base, unsigned long size)395{396int i, max;397mtrr_type ltype;398unsigned long lbase, lsize;399int error = -EINVAL;400401if (!mtrr_enabled())402return -ENODEV;403404max = num_var_ranges;405/* No CPU hotplug when we change MTRR entries */406cpus_read_lock();407mutex_lock(&mtrr_mutex);408if (reg < 0) {409/* Search for existing MTRR */410for (i = 0; i < max; ++i) {411mtrr_if->get(i, &lbase, &lsize, <ype);412if (lbase == base && lsize == size) {413reg = i;414break;415}416}417if (reg < 0) {418Dprintk("no MTRR for %lx000,%lx000 found\n", base, size);419goto out;420}421}422if (reg >= max) {423pr_warn("register: %d too big\n", reg);424goto out;425}426mtrr_if->get(reg, &lbase, &lsize, <ype);427if (lsize < 1) {428pr_warn("MTRR %d not used\n", reg);429goto out;430}431if (mtrr_usage_table[reg] < 1) {432pr_warn("reg: %d has count=0\n", reg);433goto out;434}435if (--mtrr_usage_table[reg] < 1)436set_mtrr(reg, 0, 0, 0);437error = reg;438out:439mutex_unlock(&mtrr_mutex);440cpus_read_unlock();441return error;442}443444/**445* mtrr_del - delete a memory type region446* @reg: Register returned by mtrr_add447* @base: Physical base address448* @size: Size of region449*450* If register is supplied then base and size are ignored. This is451* how drivers should call it.452*453* Releases an MTRR region. If the usage count drops to zero the454* register is freed and the region returns to default state.455* On success the register is returned, on failure a negative error456* code.457*/458int mtrr_del(int reg, unsigned long base, unsigned long size)459{460if (!mtrr_enabled())461return -ENODEV;462if (mtrr_check(base, size))463return -EINVAL;464return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);465}466467/**468* arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable469* @base: Physical base address470* @size: Size of region471*472* If PAT is available, this does nothing. If PAT is unavailable, it473* attempts to add a WC MTRR covering size bytes starting at base and474* logs an error if this fails.475*476* The called should provide a power of two size on an equivalent477* power of two boundary.478*479* Drivers must store the return value to pass to mtrr_del_wc_if_needed,480* but drivers should not try to interpret that return value.481*/482int arch_phys_wc_add(unsigned long base, unsigned long size)483{484int ret;485486if (pat_enabled() || !mtrr_enabled())487return 0; /* Success! (We don't need to do anything.) */488489ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);490if (ret < 0) {491pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",492(void *)base, (void *)(base + size - 1));493return ret;494}495return ret + MTRR_TO_PHYS_WC_OFFSET;496}497EXPORT_SYMBOL(arch_phys_wc_add);498499/*500* arch_phys_wc_del - undoes arch_phys_wc_add501* @handle: Return value from arch_phys_wc_add502*503* This cleans up after mtrr_add_wc_if_needed.504*505* The API guarantees that mtrr_del_wc_if_needed(error code) and506* mtrr_del_wc_if_needed(0) do nothing.507*/508void arch_phys_wc_del(int handle)509{510if (handle >= 1) {511WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);512mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);513}514}515EXPORT_SYMBOL(arch_phys_wc_del);516517/*518* arch_phys_wc_index - translates arch_phys_wc_add's return value519* @handle: Return value from arch_phys_wc_add520*521* This will turn the return value from arch_phys_wc_add into an mtrr522* index suitable for debugging.523*524* Note: There is no legitimate use for this function, except possibly525* in printk line. Alas there is an illegitimate use in some ancient526* drm ioctls.527*/528int arch_phys_wc_index(int handle)529{530if (handle < MTRR_TO_PHYS_WC_OFFSET)531return -1;532else533return handle - MTRR_TO_PHYS_WC_OFFSET;534}535EXPORT_SYMBOL_GPL(arch_phys_wc_index);536537int __initdata changed_by_mtrr_cleanup;538539/**540* mtrr_bp_init - initialize MTRRs on the boot CPU541*542* This needs to be called early; before any of the other CPUs are543* initialized (i.e. before smp_init()).544*/545void __init mtrr_bp_init(void)546{547bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR);548const char *why = "(not available)";549unsigned long config, dummy;550551phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32);552553if (!generic_mtrrs && mtrr_state.enabled) {554/*555* Software overwrite of MTRR state, only for generic case.556* Note that X86_FEATURE_MTRR has been reset in this case.557*/558init_table();559mtrr_build_map();560pr_info("MTRRs set to read-only\n");561562return;563}564565if (generic_mtrrs)566mtrr_if = &generic_mtrr_ops;567else568mtrr_set_if();569570if (mtrr_enabled()) {571/* Get the number of variable MTRR ranges. */572if (mtrr_if == &generic_mtrr_ops)573rdmsr(MSR_MTRRcap, config, dummy);574else575config = mtrr_if->var_regs;576num_var_ranges = config & MTRR_CAP_VCNT;577578init_table();579if (mtrr_if == &generic_mtrr_ops) {580/* BIOS may override */581if (get_mtrr_state()) {582memory_caching_control |= CACHE_MTRR;583changed_by_mtrr_cleanup = mtrr_cleanup();584mtrr_build_map();585} else {586mtrr_if = NULL;587why = "by BIOS";588}589}590}591592if (!mtrr_enabled())593pr_info("MTRRs disabled %s\n", why);594}595596/**597* mtrr_save_state - Save current fixed-range MTRR state of the first598* cpu in cpu_online_mask.599*/600void mtrr_save_state(void)601{602int first_cpu;603604if (!mtrr_enabled() || !mtrr_state.have_fixed)605return;606607first_cpu = cpumask_first(cpu_online_mask);608smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);609}610611static int __init mtrr_init_finalize(void)612{613/*614* Map might exist if guest_force_mtrr_state() has been called or if615* mtrr_enabled() returns true.616*/617mtrr_copy_map();618619if (!mtrr_enabled())620return 0;621622if (memory_caching_control & CACHE_MTRR) {623if (!changed_by_mtrr_cleanup)624mtrr_state_warn();625return 0;626}627628mtrr_register_syscore();629630return 0;631}632subsys_initcall(mtrr_init_finalize);633634635