// SPDX-License-Identifier: GPL-2.0-only1/*2* powerpc code to implement the kexec_file_load syscall3*4* Copyright (C) 2004 Adam Litke ([email protected])5* Copyright (C) 2004 IBM Corp.6* Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation7* Copyright (C) 2005 R Sharada ([email protected])8* Copyright (C) 2006 Mohan Kumar M ([email protected])9* Copyright (C) 2020 IBM Corporation10*11* Based on kexec-tools' kexec-ppc64.c, fs2dt.c.12* Heavily modified for the kernel by13* Hari Bathini, IBM Corporation.14*/1516#define pr_fmt(fmt) "kexec ranges: " fmt1718#include <linux/sort.h>19#include <linux/kexec.h>20#include <linux/of.h>21#include <linux/slab.h>22#include <linux/memblock.h>23#include <linux/crash_core.h>24#include <asm/sections.h>25#include <asm/kexec_ranges.h>26#include <asm/crashdump-ppc64.h>2728#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)29/**30* get_max_nr_ranges - Get the max no. of ranges crash_mem structure31* could hold, given the size allocated for it.32* @size: Allocation size of crash_mem structure.33*34* Returns the maximum no. of ranges.35*/36static inline unsigned int get_max_nr_ranges(size_t size)37{38return ((size - sizeof(struct crash_mem)) /39sizeof(struct range));40}4142/**43* get_mem_rngs_size - Get the allocated size of mem_rngs based on44* max_nr_ranges and chunk size.45* @mem_rngs: Memory ranges.46*47* Returns the maximum size of @mem_rngs.48*/49static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs)50{51size_t size;5253if (!mem_rngs)54return 0;5556size = (sizeof(struct crash_mem) +57(mem_rngs->max_nr_ranges * sizeof(struct range)));5859/*60* Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ.61* So, align to get the actual length.62*/63return ALIGN(size, MEM_RANGE_CHUNK_SZ);64}6566/**67* __add_mem_range - add a memory range to memory ranges list.68* @mem_ranges: Range list to add the memory range to.69* @base: Base address of the range to add.70* @size: Size of the memory range to add.71*72* (Re)allocates memory, if needed.73*74* Returns 0 on success, negative errno on error.75*/76static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)77{78struct crash_mem *mem_rngs = *mem_ranges;7980if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) {81mem_rngs = realloc_mem_ranges(mem_ranges);82if (!mem_rngs)83return -ENOMEM;84}8586mem_rngs->ranges[mem_rngs->nr_ranges].start = base;87mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1;88pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n",89base, base + size - 1, mem_rngs->nr_ranges);90mem_rngs->nr_ranges++;91return 0;92}9394/**95* __merge_memory_ranges - Merges the given memory ranges list.96* @mem_rngs: Range list to merge.97*98* Assumes a sorted range list.99*100* Returns nothing.101*/102static void __merge_memory_ranges(struct crash_mem *mem_rngs)103{104struct range *ranges;105int i, idx;106107if (!mem_rngs)108return;109110idx = 0;111ranges = &(mem_rngs->ranges[0]);112for (i = 1; i < mem_rngs->nr_ranges; i++) {113if (ranges[i].start <= (ranges[i-1].end + 1))114ranges[idx].end = ranges[i].end;115else {116idx++;117if (i == idx)118continue;119120ranges[idx] = ranges[i];121}122}123mem_rngs->nr_ranges = idx + 1;124}125126/* cmp_func_t callback to sort ranges with sort() */127static int rngcmp(const void *_x, const void *_y)128{129const struct range *x = _x, *y = _y;130131if (x->start > y->start)132return 1;133if (x->start < y->start)134return -1;135return 0;136}137138/**139* sort_memory_ranges - Sorts the given memory ranges list.140* @mem_rngs: Range list to sort.141* @merge: If true, merge the list after sorting.142*143* Returns nothing.144*/145void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge)146{147int i;148149if (!mem_rngs)150return;151152/* Sort the ranges in-place */153sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges,154sizeof(mem_rngs->ranges[0]), rngcmp, NULL);155156if (merge)157__merge_memory_ranges(mem_rngs);158159/* For debugging purpose */160pr_debug("Memory ranges:\n");161for (i = 0; i < mem_rngs->nr_ranges; i++) {162pr_debug("\t[%03d][%#016llx - %#016llx]\n", i,163mem_rngs->ranges[i].start,164mem_rngs->ranges[i].end);165}166}167168/**169* realloc_mem_ranges - reallocate mem_ranges with size incremented170* by MEM_RANGE_CHUNK_SZ. Frees up the old memory,171* if memory allocation fails.172* @mem_ranges: Memory ranges to reallocate.173*174* Returns pointer to reallocated memory on success, NULL otherwise.175*/176struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges)177{178struct crash_mem *mem_rngs = *mem_ranges;179unsigned int nr_ranges;180size_t size;181182size = get_mem_rngs_size(mem_rngs);183nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0;184185size += MEM_RANGE_CHUNK_SZ;186mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL);187if (!mem_rngs) {188kfree(*mem_ranges);189*mem_ranges = NULL;190return NULL;191}192193mem_rngs->nr_ranges = nr_ranges;194mem_rngs->max_nr_ranges = get_max_nr_ranges(size);195*mem_ranges = mem_rngs;196197return mem_rngs;198}199200/**201* add_mem_range - Updates existing memory range, if there is an overlap.202* Else, adds a new memory range.203* @mem_ranges: Range list to add the memory range to.204* @base: Base address of the range to add.205* @size: Size of the memory range to add.206*207* (Re)allocates memory, if needed.208*209* Returns 0 on success, negative errno on error.210*/211int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)212{213struct crash_mem *mem_rngs = *mem_ranges;214u64 mstart, mend, end;215unsigned int i;216217if (!size)218return 0;219220end = base + size - 1;221222if (!mem_rngs || !(mem_rngs->nr_ranges))223return __add_mem_range(mem_ranges, base, size);224225for (i = 0; i < mem_rngs->nr_ranges; i++) {226mstart = mem_rngs->ranges[i].start;227mend = mem_rngs->ranges[i].end;228if (base < mend && end > mstart) {229if (base < mstart)230mem_rngs->ranges[i].start = base;231if (end > mend)232mem_rngs->ranges[i].end = end;233return 0;234}235}236237return __add_mem_range(mem_ranges, base, size);238}239240#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */241242#ifdef CONFIG_KEXEC_FILE243/**244* add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.245* @mem_ranges: Range list to add the memory range(s) to.246*247* Returns 0 on success, negative errno on error.248*/249static int add_tce_mem_ranges(struct crash_mem **mem_ranges)250{251struct device_node *dn = NULL;252int ret = 0;253254for_each_node_by_type(dn, "pci") {255u64 base;256u32 size;257258ret = of_property_read_u64(dn, "linux,tce-base", &base);259ret |= of_property_read_u32(dn, "linux,tce-size", &size);260if (ret) {261/*262* It is ok to have pci nodes without tce. So, ignore263* property does not exist error.264*/265if (ret == -EINVAL) {266ret = 0;267continue;268}269break;270}271272ret = add_mem_range(mem_ranges, base, size);273if (ret)274break;275}276277of_node_put(dn);278return ret;279}280281/**282* add_initrd_mem_range - Adds initrd range to the given memory ranges list,283* if the initrd was retained.284* @mem_ranges: Range list to add the memory range to.285*286* Returns 0 on success, negative errno on error.287*/288static int add_initrd_mem_range(struct crash_mem **mem_ranges)289{290u64 base, end;291int ret;292293/* This range means something, only if initrd was retained */294if (!strstr(saved_command_line, "retain_initrd"))295return 0;296297ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);298ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);299if (!ret)300ret = add_mem_range(mem_ranges, base, end - base + 1);301302return ret;303}304305/**306* add_htab_mem_range - Adds htab range to the given memory ranges list,307* if it exists308* @mem_ranges: Range list to add the memory range to.309*310* Returns 0 on success, negative errno on error.311*/312static int add_htab_mem_range(struct crash_mem **mem_ranges)313{314315#ifdef CONFIG_PPC_64S_HASH_MMU316if (!htab_address)317return 0;318319return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);320#else321return 0;322#endif323}324325/**326* add_kernel_mem_range - Adds kernel text region to the given327* memory ranges list.328* @mem_ranges: Range list to add the memory range to.329*330* Returns 0 on success, negative errno on error.331*/332static int add_kernel_mem_range(struct crash_mem **mem_ranges)333{334return add_mem_range(mem_ranges, 0, __pa(_end));335}336#endif /* CONFIG_KEXEC_FILE */337338#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)339/**340* add_rtas_mem_range - Adds RTAS region to the given memory ranges list.341* @mem_ranges: Range list to add the memory range to.342*343* Returns 0 on success, negative errno on error.344*/345static int add_rtas_mem_range(struct crash_mem **mem_ranges)346{347struct device_node *dn;348u32 base, size;349int ret = 0;350351dn = of_find_node_by_path("/rtas");352if (!dn)353return 0;354355ret = of_property_read_u32(dn, "linux,rtas-base", &base);356ret |= of_property_read_u32(dn, "rtas-size", &size);357if (!ret)358ret = add_mem_range(mem_ranges, base, size);359360of_node_put(dn);361return ret;362}363364/**365* add_opal_mem_range - Adds OPAL region to the given memory ranges list.366* @mem_ranges: Range list to add the memory range to.367*368* Returns 0 on success, negative errno on error.369*/370static int add_opal_mem_range(struct crash_mem **mem_ranges)371{372struct device_node *dn;373u64 base, size;374int ret;375376dn = of_find_node_by_path("/ibm,opal");377if (!dn)378return 0;379380ret = of_property_read_u64(dn, "opal-base-address", &base);381ret |= of_property_read_u64(dn, "opal-runtime-size", &size);382if (!ret)383ret = add_mem_range(mem_ranges, base, size);384385of_node_put(dn);386return ret;387}388#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */389390#ifdef CONFIG_KEXEC_FILE391/**392* add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w393* to the given memory ranges list.394* @mem_ranges: Range list to add the memory ranges to.395*396* Returns 0 on success, negative errno on error.397*/398static int add_reserved_mem_ranges(struct crash_mem **mem_ranges)399{400int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;401struct device_node *root = of_find_node_by_path("/");402const __be32 *prop;403404prop = of_get_property(root, "reserved-ranges", &len);405n_mem_addr_cells = of_n_addr_cells(root);406n_mem_size_cells = of_n_size_cells(root);407of_node_put(root);408if (!prop)409return 0;410411cells = n_mem_addr_cells + n_mem_size_cells;412413/* Each reserved range is an (address,size) pair */414for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {415u64 base, size;416417base = of_read_number(prop + (i * cells), n_mem_addr_cells);418size = of_read_number(prop + (i * cells) + n_mem_addr_cells,419n_mem_size_cells);420421ret = add_mem_range(mem_ranges, base, size);422if (ret)423break;424}425426return ret;427}428429/**430* get_reserved_memory_ranges - Get reserve memory ranges. This list includes431* memory regions that should be added to the432* memory reserve map to ensure the region is433* protected from any mischief.434* @mem_ranges: Range list to add the memory ranges to.435*436* Returns 0 on success, negative errno on error.437*/438int get_reserved_memory_ranges(struct crash_mem **mem_ranges)439{440int ret;441442ret = add_rtas_mem_range(mem_ranges);443if (ret)444goto out;445446ret = add_tce_mem_ranges(mem_ranges);447if (ret)448goto out;449450ret = add_reserved_mem_ranges(mem_ranges);451out:452if (ret)453pr_err("Failed to setup reserved memory ranges\n");454return ret;455}456457/**458* get_exclude_memory_ranges - Get exclude memory ranges. This list includes459* regions like opal/rtas, tce-table, initrd,460* kernel, htab which should be avoided while461* setting up kexec load segments.462* @mem_ranges: Range list to add the memory ranges to.463*464* Returns 0 on success, negative errno on error.465*/466int get_exclude_memory_ranges(struct crash_mem **mem_ranges)467{468int ret;469470ret = add_tce_mem_ranges(mem_ranges);471if (ret)472goto out;473474ret = add_initrd_mem_range(mem_ranges);475if (ret)476goto out;477478ret = add_htab_mem_range(mem_ranges);479if (ret)480goto out;481482ret = add_kernel_mem_range(mem_ranges);483if (ret)484goto out;485486ret = add_rtas_mem_range(mem_ranges);487if (ret)488goto out;489490ret = add_opal_mem_range(mem_ranges);491if (ret)492goto out;493494ret = add_reserved_mem_ranges(mem_ranges);495if (ret)496goto out;497498/* exclude memory ranges should be sorted for easy lookup */499sort_memory_ranges(*mem_ranges, true);500out:501if (ret)502pr_err("Failed to setup exclude memory ranges\n");503return ret;504}505506#ifdef CONFIG_CRASH_DUMP507/**508* get_usable_memory_ranges - Get usable memory ranges. This list includes509* regions like crashkernel, opal/rtas & tce-table,510* that kdump kernel could use.511* @mem_ranges: Range list to add the memory ranges to.512*513* Returns 0 on success, negative errno on error.514*/515int get_usable_memory_ranges(struct crash_mem **mem_ranges)516{517int ret;518519/*520* Early boot failure observed on guests when low memory (first memory521* block?) is not added to usable memory. So, add [0, crashk_res.end]522* instead of [crashk_res.start, crashk_res.end] to workaround it.523* Also, crashed kernel's memory must be added to reserve map to524* avoid kdump kernel from using it.525*/526ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);527if (ret)528goto out;529530ret = add_rtas_mem_range(mem_ranges);531if (ret)532goto out;533534ret = add_opal_mem_range(mem_ranges);535if (ret)536goto out;537538ret = add_tce_mem_ranges(mem_ranges);539out:540if (ret)541pr_err("Failed to setup usable memory ranges\n");542return ret;543}544#endif /* CONFIG_CRASH_DUMP */545#endif /* CONFIG_KEXEC_FILE */546547#ifdef CONFIG_CRASH_DUMP548/**549* get_crash_memory_ranges - Get crash memory ranges. This list includes550* first/crashing kernel's memory regions that551* would be exported via an elfcore.552* @mem_ranges: Range list to add the memory ranges to.553*554* Returns 0 on success, negative errno on error.555*/556int get_crash_memory_ranges(struct crash_mem **mem_ranges)557{558phys_addr_t base, end;559struct crash_mem *tmem;560u64 i;561int ret;562563for_each_mem_range(i, &base, &end) {564u64 size = end - base;565566/* Skip backup memory region, which needs a separate entry */567if (base == BACKUP_SRC_START) {568if (size > BACKUP_SRC_SIZE) {569base = BACKUP_SRC_END + 1;570size -= BACKUP_SRC_SIZE;571} else572continue;573}574575ret = add_mem_range(mem_ranges, base, size);576if (ret)577goto out;578579/* Try merging adjacent ranges before reallocation attempt */580if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)581sort_memory_ranges(*mem_ranges, true);582}583584/* Reallocate memory ranges if there is no space to split ranges */585tmem = *mem_ranges;586if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {587tmem = realloc_mem_ranges(mem_ranges);588if (!tmem)589goto out;590}591592/* Exclude crashkernel region */593ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);594if (ret)595goto out;596597/*598* FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL599* regions are exported to save their context at the time of600* crash, they should actually be backed up just like the601* first 64K bytes of memory.602*/603ret = add_rtas_mem_range(mem_ranges);604if (ret)605goto out;606607ret = add_opal_mem_range(mem_ranges);608if (ret)609goto out;610611/* create a separate program header for the backup region */612ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);613if (ret)614goto out;615616sort_memory_ranges(*mem_ranges, false);617out:618if (ret)619pr_err("Failed to setup crash memory ranges\n");620return ret;621}622623/**624* remove_mem_range - Removes the given memory range from the range list.625* @mem_ranges: Range list to remove the memory range to.626* @base: Base address of the range to remove.627* @size: Size of the memory range to remove.628*629* (Re)allocates memory, if needed.630*631* Returns 0 on success, negative errno on error.632*/633int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)634{635u64 end;636int ret = 0;637unsigned int i;638u64 mstart, mend;639struct crash_mem *mem_rngs = *mem_ranges;640641if (!size)642return 0;643644/*645* Memory range are stored as start and end address, use646* the same format to do remove operation.647*/648end = base + size - 1;649650for (i = 0; i < mem_rngs->nr_ranges; i++) {651mstart = mem_rngs->ranges[i].start;652mend = mem_rngs->ranges[i].end;653654/*655* Memory range to remove is not part of this range entry656* in the memory range list657*/658if (!(base >= mstart && end <= mend))659continue;660661/*662* Memory range to remove is equivalent to this entry in the663* memory range list. Remove the range entry from the list.664*/665if (base == mstart && end == mend) {666for (; i < mem_rngs->nr_ranges - 1; i++) {667mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;668mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;669}670mem_rngs->nr_ranges--;671goto out;672}673/*674* Start address of the memory range to remove and the675* current memory range entry in the list is same. Just676* move the start address of the current memory range677* entry in the list to end + 1.678*/679else if (base == mstart) {680mem_rngs->ranges[i].start = end + 1;681goto out;682}683/*684* End address of the memory range to remove and the685* current memory range entry in the list is same.686* Just move the end address of the current memory687* range entry in the list to base - 1.688*/689else if (end == mend) {690mem_rngs->ranges[i].end = base - 1;691goto out;692}693/*694* Memory range to remove is not at the edge of current695* memory range entry. Split the current memory entry into696* two half.697*/698else {699mem_rngs->ranges[i].end = base - 1;700size = mem_rngs->ranges[i].end - end;701ret = add_mem_range(mem_ranges, end + 1, size);702}703}704out:705return ret;706}707#endif /* CONFIG_CRASH_DUMP */708709710