Path: blob/master/arch/powerpc/platforms/pseries/phyp_dump.c
10818 views
/*1* Hypervisor-assisted dump2*3* Linas Vepstas, Manish Ahuja 20084* Copyright 2008 IBM Corp.5*6* This program is free software; you can redistribute it and/or7* modify it under the terms of the GNU General Public License8* as published by the Free Software Foundation; either version9* 2 of the License, or (at your option) any later version.10*11*/1213#include <linux/gfp.h>14#include <linux/init.h>15#include <linux/kobject.h>16#include <linux/mm.h>17#include <linux/of.h>18#include <linux/pfn.h>19#include <linux/swap.h>20#include <linux/sysfs.h>2122#include <asm/page.h>23#include <asm/phyp_dump.h>24#include <asm/machdep.h>25#include <asm/prom.h>26#include <asm/rtas.h>2728/* Variables, used to communicate data between early boot and late boot */29static struct phyp_dump phyp_dump_vars;30struct phyp_dump *phyp_dump_info = &phyp_dump_vars;3132static int ibm_configure_kernel_dump;33/* ------------------------------------------------- */34/* RTAS interfaces to declare the dump regions */3536struct dump_section {37u32 dump_flags;38u16 source_type;39u16 error_flags;40u64 source_address;41u64 source_length;42u64 length_copied;43u64 destination_address;44};4546struct phyp_dump_header {47u32 version;48u16 num_of_sections;49u16 status;5051u32 first_offset_section;52u32 dump_disk_section;53u64 block_num_dd;54u64 num_of_blocks_dd;55u32 offset_dd;56u32 maxtime_to_auto;57/* No dump disk path string used */5859struct dump_section cpu_data;60struct dump_section hpte_data;61struct dump_section kernel_data;62};6364/* The dump header *must be* in low memory, so .bss it */65static struct phyp_dump_header phdr;6667#define NUM_DUMP_SECTIONS 368#define DUMP_HEADER_VERSION 0x169#define DUMP_REQUEST_FLAG 0x170#define DUMP_SOURCE_CPU 0x000171#define DUMP_SOURCE_HPTE 0x000272#define DUMP_SOURCE_RMO 0x001173#define DUMP_ERROR_FLAG 0x200074#define DUMP_TRIGGERED 0x400075#define DUMP_PERFORMED 0x8000767778/**79* init_dump_header() - initialize the header declaring a dump80* Returns: length of dump save area.81*82* When the hypervisor saves crashed state, it needs to put83* it somewhere. The dump header tells the hypervisor where84* the data can be saved.85*/86static unsigned long init_dump_header(struct phyp_dump_header *ph)87{88unsigned long addr_offset = 0;8990/* Set up the dump header */91ph->version = DUMP_HEADER_VERSION;92ph->num_of_sections = NUM_DUMP_SECTIONS;93ph->status = 0;9495ph->first_offset_section =96(u32)offsetof(struct phyp_dump_header, cpu_data);97ph->dump_disk_section = 0;98ph->block_num_dd = 0;99ph->num_of_blocks_dd = 0;100ph->offset_dd = 0;101102ph->maxtime_to_auto = 0; /* disabled */103104/* The first two sections are mandatory */105ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;106ph->cpu_data.source_type = DUMP_SOURCE_CPU;107ph->cpu_data.source_address = 0;108ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;109ph->cpu_data.destination_address = addr_offset;110addr_offset += phyp_dump_info->cpu_state_size;111112ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;113ph->hpte_data.source_type = DUMP_SOURCE_HPTE;114ph->hpte_data.source_address = 0;115ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;116ph->hpte_data.destination_address = addr_offset;117addr_offset += phyp_dump_info->hpte_region_size;118119/* This section describes the low kernel region */120ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;121ph->kernel_data.source_type = DUMP_SOURCE_RMO;122ph->kernel_data.source_address = PHYP_DUMP_RMR_START;123ph->kernel_data.source_length = PHYP_DUMP_RMR_END;124ph->kernel_data.destination_address = addr_offset;125addr_offset += ph->kernel_data.source_length;126127return addr_offset;128}129130static void print_dump_header(const struct phyp_dump_header *ph)131{132#ifdef DEBUG133if (ph == NULL)134return;135136printk(KERN_INFO "dump header:\n");137/* setup some ph->sections required */138printk(KERN_INFO "version = %d\n", ph->version);139printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);140printk(KERN_INFO "Status = 0x%x\n", ph->status);141142/* No ph->disk, so all should be set to 0 */143printk(KERN_INFO "Offset to first section 0x%x\n",144ph->first_offset_section);145printk(KERN_INFO "dump disk sections should be zero\n");146printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);147printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);148printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);149printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);150printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);151152/*set cpu state and hpte states as well scratch pad area */153printk(KERN_INFO " CPU AREA\n");154printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);155printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);156printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);157printk(KERN_INFO "cpu source_address =%llx\n",158ph->cpu_data.source_address);159printk(KERN_INFO "cpu source_length =%llx\n",160ph->cpu_data.source_length);161printk(KERN_INFO "cpu length_copied =%llx\n",162ph->cpu_data.length_copied);163164printk(KERN_INFO " HPTE AREA\n");165printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);166printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);167printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);168printk(KERN_INFO "HPTE source_address =%llx\n",169ph->hpte_data.source_address);170printk(KERN_INFO "HPTE source_length =%llx\n",171ph->hpte_data.source_length);172printk(KERN_INFO "HPTE length_copied =%llx\n",173ph->hpte_data.length_copied);174175printk(KERN_INFO " SRSD AREA\n");176printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);177printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);178printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);179printk(KERN_INFO "SRSD source_address =%llx\n",180ph->kernel_data.source_address);181printk(KERN_INFO "SRSD source_length =%llx\n",182ph->kernel_data.source_length);183printk(KERN_INFO "SRSD length_copied =%llx\n",184ph->kernel_data.length_copied);185#endif186}187188static ssize_t show_phyp_dump_active(struct kobject *kobj,189struct kobj_attribute *attr, char *buf)190{191192/* create filesystem entry so kdump is phyp-dump aware */193return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);194}195196static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,197show_phyp_dump_active,198NULL);199200static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)201{202int rc;203204/* Add addr value if not initialized before */205if (ph->cpu_data.destination_address == 0) {206ph->cpu_data.destination_address += addr;207ph->hpte_data.destination_address += addr;208ph->kernel_data.destination_address += addr;209}210211/* ToDo Invalidate kdump and free memory range. */212213do {214rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,2151, ph, sizeof(struct phyp_dump_header));216} while (rtas_busy_delay(rc));217218if (rc) {219printk(KERN_ERR "phyp-dump: unexpected error (%d) on "220"register\n", rc);221print_dump_header(ph);222return;223}224225rc = sysfs_create_file(kernel_kobj, &pdl.attr);226if (rc)227printk(KERN_ERR "phyp-dump: unable to create sysfs"228" file (%d)\n", rc);229}230231static232void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)233{234int rc;235236/* Add addr value if not initialized before */237if (ph->cpu_data.destination_address == 0) {238ph->cpu_data.destination_address += addr;239ph->hpte_data.destination_address += addr;240ph->kernel_data.destination_address += addr;241}242243do {244rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,2452, ph, sizeof(struct phyp_dump_header));246} while (rtas_busy_delay(rc));247248if (rc) {249printk(KERN_ERR "phyp-dump: unexpected error (%d) "250"on invalidate\n", rc);251print_dump_header(ph);252}253}254255/* ------------------------------------------------- */256/**257* release_memory_range -- release memory previously memblock_reserved258* @start_pfn: starting physical frame number259* @nr_pages: number of pages to free.260*261* This routine will release memory that had been previously262* memblock_reserved in early boot. The released memory becomes263* available for genreal use.264*/265static void release_memory_range(unsigned long start_pfn,266unsigned long nr_pages)267{268struct page *rpage;269unsigned long end_pfn;270long i;271272end_pfn = start_pfn + nr_pages;273274for (i = start_pfn; i <= end_pfn; i++) {275rpage = pfn_to_page(i);276if (PageReserved(rpage)) {277ClearPageReserved(rpage);278init_page_count(rpage);279__free_page(rpage);280totalram_pages++;281}282}283}284285/**286* track_freed_range -- Counts the range being freed.287* Once the counter goes to zero, it re-registers dump for288* future use.289*/290static void291track_freed_range(unsigned long addr, unsigned long length)292{293static unsigned long scratch_area_size, reserved_area_size;294295if (addr < phyp_dump_info->init_reserve_start)296return;297298if ((addr >= phyp_dump_info->init_reserve_start) &&299(addr <= phyp_dump_info->init_reserve_start +300phyp_dump_info->init_reserve_size))301reserved_area_size += length;302303if ((addr >= phyp_dump_info->reserved_scratch_addr) &&304(addr <= phyp_dump_info->reserved_scratch_addr +305phyp_dump_info->reserved_scratch_size))306scratch_area_size += length;307308if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&309(scratch_area_size == phyp_dump_info->reserved_scratch_size)) {310311invalidate_last_dump(&phdr,312phyp_dump_info->reserved_scratch_addr);313register_dump_area(&phdr,314phyp_dump_info->reserved_scratch_addr);315}316}317318/* ------------------------------------------------- */319/**320* sysfs_release_region -- sysfs interface to release memory range.321*322* Usage:323* "echo <start addr> <length> > /sys/kernel/release_region"324*325* Example:326* "echo 0x40000000 0x10000000 > /sys/kernel/release_region"327*328* will release 256MB starting at 1GB.329*/330static ssize_t store_release_region(struct kobject *kobj,331struct kobj_attribute *attr,332const char *buf, size_t count)333{334unsigned long start_addr, length, end_addr;335unsigned long start_pfn, nr_pages;336ssize_t ret;337338ret = sscanf(buf, "%lx %lx", &start_addr, &length);339if (ret != 2)340return -EINVAL;341342track_freed_range(start_addr, length);343344/* Range-check - don't free any reserved memory that345* wasn't reserved for phyp-dump */346if (start_addr < phyp_dump_info->init_reserve_start)347start_addr = phyp_dump_info->init_reserve_start;348349end_addr = phyp_dump_info->init_reserve_start +350phyp_dump_info->init_reserve_size;351if (start_addr+length > end_addr)352length = end_addr - start_addr;353354/* Release the region of memory assed in by user */355start_pfn = PFN_DOWN(start_addr);356nr_pages = PFN_DOWN(length);357release_memory_range(start_pfn, nr_pages);358359return count;360}361362static ssize_t show_release_region(struct kobject *kobj,363struct kobj_attribute *attr, char *buf)364{365u64 second_addr_range;366367/* total reserved size - start of scratch area */368second_addr_range = phyp_dump_info->init_reserve_size -369phyp_dump_info->reserved_scratch_size;370return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"371" DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",372phdr.cpu_data.destination_address,373phdr.cpu_data.length_copied,374phdr.hpte_data.destination_address,375phdr.hpte_data.length_copied,376phdr.kernel_data.destination_address,377phdr.kernel_data.length_copied,378phyp_dump_info->init_reserve_start,379second_addr_range);380}381382static struct kobj_attribute rr = __ATTR(release_region, 0600,383show_release_region,384store_release_region);385386static int __init phyp_dump_setup(void)387{388struct device_node *rtas;389const struct phyp_dump_header *dump_header = NULL;390unsigned long dump_area_start;391unsigned long dump_area_length;392int header_len = 0;393int rc;394395/* If no memory was reserved in early boot, there is nothing to do */396if (phyp_dump_info->init_reserve_size == 0)397return 0;398399/* Return if phyp dump not supported */400if (!phyp_dump_info->phyp_dump_configured)401return -ENOSYS;402403/* Is there dump data waiting for us? If there isn't,404* then register a new dump area, and release all of405* the rest of the reserved ram.406*407* The /rtas/ibm,kernel-dump rtas node is present only408* if there is dump data waiting for us.409*/410rtas = of_find_node_by_path("/rtas");411if (rtas) {412dump_header = of_get_property(rtas, "ibm,kernel-dump",413&header_len);414of_node_put(rtas);415}416417ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");418419print_dump_header(dump_header);420dump_area_length = init_dump_header(&phdr);421/* align down */422dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;423424if (dump_header == NULL) {425register_dump_area(&phdr, dump_area_start);426return 0;427}428429/* re-register the dump area, if old dump was invalid */430if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {431invalidate_last_dump(&phdr, dump_area_start);432register_dump_area(&phdr, dump_area_start);433return 0;434}435436if (dump_header) {437phyp_dump_info->reserved_scratch_addr =438dump_header->cpu_data.destination_address;439phyp_dump_info->reserved_scratch_size =440dump_header->cpu_data.source_length +441dump_header->hpte_data.source_length +442dump_header->kernel_data.source_length;443}444445/* Should we create a dump_subsys, analogous to s390/ipl.c ? */446rc = sysfs_create_file(kernel_kobj, &rr.attr);447if (rc)448printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",449rc);450451/* ToDo: re-register the dump area, for next time. */452return 0;453}454machine_subsys_initcall(pseries, phyp_dump_setup);455456int __init early_init_dt_scan_phyp_dump(unsigned long node,457const char *uname, int depth, void *data)458{459const unsigned int *sizes;460461phyp_dump_info->phyp_dump_configured = 0;462phyp_dump_info->phyp_dump_is_active = 0;463464if (depth != 1 || strcmp(uname, "rtas") != 0)465return 0;466467if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))468phyp_dump_info->phyp_dump_configured++;469470if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))471phyp_dump_info->phyp_dump_is_active++;472473sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",474NULL);475if (!sizes)476return 0;477478if (sizes[0] == 1)479phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);480481if (sizes[3] == 2)482phyp_dump_info->hpte_region_size =483*((unsigned long *)&sizes[4]);484return 1;485}486487/* Look for phyp_dump= cmdline option */488static int __init early_phyp_dump_enabled(char *p)489{490phyp_dump_info->phyp_dump_at_boot = 1;491492if (!p)493return 0;494495if (strncmp(p, "1", 1) == 0)496phyp_dump_info->phyp_dump_at_boot = 1;497else if (strncmp(p, "0", 1) == 0)498phyp_dump_info->phyp_dump_at_boot = 0;499500return 0;501}502early_param("phyp_dump", early_phyp_dump_enabled);503504/* Look for phyp_dump_reserve_size= cmdline option */505static int __init early_phyp_dump_reserve_size(char *p)506{507if (p)508phyp_dump_info->reserve_bootvar = memparse(p, &p);509510return 0;511}512early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);513514515