/*P:4001* This contains run_guest() which actually calls into the Host<->Guest2* Switcher and analyzes the return, such as determining if the Guest wants the3* Host to do something. This file also contains useful helper routines.4:*/5#include <linux/module.h>6#include <linux/stringify.h>7#include <linux/stddef.h>8#include <linux/io.h>9#include <linux/mm.h>10#include <linux/vmalloc.h>11#include <linux/cpu.h>12#include <linux/freezer.h>13#include <linux/highmem.h>14#include <linux/slab.h>15#include <asm/paravirt.h>16#include <asm/pgtable.h>17#include <asm/uaccess.h>18#include <asm/poll.h>19#include <asm/asm-offsets.h>20#include "lg.h"212223static struct vm_struct *switcher_vma;24static struct page **switcher_page;2526/* This One Big lock protects all inter-guest data structures. */27DEFINE_MUTEX(lguest_lock);2829/*H:01030* We need to set up the Switcher at a high virtual address. Remember the31* Switcher is a few hundred bytes of assembler code which actually changes the32* CPU to run the Guest, and then changes back to the Host when a trap or33* interrupt happens.34*35* The Switcher code must be at the same virtual address in the Guest as the36* Host since it will be running as the switchover occurs.37*38* Trying to map memory at a particular address is an unusual thing to do, so39* it's not a simple one-liner.40*/41static __init int map_switcher(void)42{43int i, err;44struct page **pagep;4546/*47* Map the Switcher in to high memory.48*49* It turns out that if we choose the address 0xFFC00000 (4MB under the50* top virtual address), it makes setting up the page tables really51* easy.52*/5354/*55* We allocate an array of struct page pointers. map_vm_area() wants56* this, rather than just an array of pages.57*/58switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES,59GFP_KERNEL);60if (!switcher_page) {61err = -ENOMEM;62goto out;63}6465/*66* Now we actually allocate the pages. The Guest will see these pages,67* so we make sure they're zeroed.68*/69for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {70switcher_page[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);71if (!switcher_page[i]) {72err = -ENOMEM;73goto free_some_pages;74}75}7677/*78* First we check that the Switcher won't overlap the fixmap area at79* the top of memory. It's currently nowhere near, but it could have80* very strange effects if it ever happened.81*/82if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){83err = -ENOMEM;84printk("lguest: mapping switcher would thwack fixmap\n");85goto free_pages;86}8788/*89* Now we reserve the "virtual memory area" we want: 0xFFC0000090* (SWITCHER_ADDR). We might not get it in theory, but in practice91* it's worked so far. The end address needs +1 because __get_vm_area92* allocates an extra guard page, so we need space for that.93*/94switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,95VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR96+ (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);97if (!switcher_vma) {98err = -ENOMEM;99printk("lguest: could not map switcher pages high\n");100goto free_pages;101}102103/*104* This code actually sets up the pages we've allocated to appear at105* SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the106* kind of pages we're mapping (kernel pages), and a pointer to our107* array of struct pages. It increments that pointer, but we don't108* care.109*/110pagep = switcher_page;111err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);112if (err) {113printk("lguest: map_vm_area failed: %i\n", err);114goto free_vma;115}116117/*118* Now the Switcher is mapped at the right address, we can't fail!119* Copy in the compiled-in Switcher code (from <arch>_switcher.S).120*/121memcpy(switcher_vma->addr, start_switcher_text,122end_switcher_text - start_switcher_text);123124printk(KERN_INFO "lguest: mapped switcher at %p\n",125switcher_vma->addr);126/* And we succeeded... */127return 0;128129free_vma:130vunmap(switcher_vma->addr);131free_pages:132i = TOTAL_SWITCHER_PAGES;133free_some_pages:134for (--i; i >= 0; i--)135__free_pages(switcher_page[i], 0);136kfree(switcher_page);137out:138return err;139}140/*:*/141142/* Cleaning up the mapping when the module is unloaded is almost... too easy. */143static void unmap_switcher(void)144{145unsigned int i;146147/* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */148vunmap(switcher_vma->addr);149/* Now we just need to free the pages we copied the switcher into */150for (i = 0; i < TOTAL_SWITCHER_PAGES; i++)151__free_pages(switcher_page[i], 0);152kfree(switcher_page);153}154155/*H:032156* Dealing With Guest Memory.157*158* Before we go too much further into the Host, we need to grok the routines159* we use to deal with Guest memory.160*161* When the Guest gives us (what it thinks is) a physical address, we can use162* the normal copy_from_user() & copy_to_user() on the corresponding place in163* the memory region allocated by the Launcher.164*165* But we can't trust the Guest: it might be trying to access the Launcher166* code. We have to check that the range is below the pfn_limit the Launcher167* gave us. We have to make sure that addr + len doesn't give us a false168* positive by overflowing, too.169*/170bool lguest_address_ok(const struct lguest *lg,171unsigned long addr, unsigned long len)172{173return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);174}175176/*177* This routine copies memory from the Guest. Here we can see how useful the178* kill_lguest() routine we met in the Launcher can be: we return a random179* value (all zeroes) instead of needing to return an error.180*/181void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)182{183if (!lguest_address_ok(cpu->lg, addr, bytes)184|| copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) {185/* copy_from_user should do this, but as we rely on it... */186memset(b, 0, bytes);187kill_guest(cpu, "bad read address %#lx len %u", addr, bytes);188}189}190191/* This is the write (copy into Guest) version. */192void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,193unsigned bytes)194{195if (!lguest_address_ok(cpu->lg, addr, bytes)196|| copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0)197kill_guest(cpu, "bad write address %#lx len %u", addr, bytes);198}199/*:*/200201/*H:030202* Let's jump straight to the the main loop which runs the Guest.203* Remember, this is called by the Launcher reading /dev/lguest, and we keep204* going around and around until something interesting happens.205*/206int run_guest(struct lg_cpu *cpu, unsigned long __user *user)207{208/* We stop running once the Guest is dead. */209while (!cpu->lg->dead) {210unsigned int irq;211bool more;212213/* First we run any hypercalls the Guest wants done. */214if (cpu->hcall)215do_hypercalls(cpu);216217/*218* It's possible the Guest did a NOTIFY hypercall to the219* Launcher.220*/221if (cpu->pending_notify) {222/*223* Does it just needs to write to a registered224* eventfd (ie. the appropriate virtqueue thread)?225*/226if (!send_notify_to_eventfd(cpu)) {227/* OK, we tell the main Laucher. */228if (put_user(cpu->pending_notify, user))229return -EFAULT;230return sizeof(cpu->pending_notify);231}232}233234/* Check for signals */235if (signal_pending(current))236return -ERESTARTSYS;237238/*239* Check if there are any interrupts which can be delivered now:240* if so, this sets up the hander to be executed when we next241* run the Guest.242*/243irq = interrupt_pending(cpu, &more);244if (irq < LGUEST_IRQS)245try_deliver_interrupt(cpu, irq, more);246247/*248* All long-lived kernel loops need to check with this horrible249* thing called the freezer. If the Host is trying to suspend,250* it stops us.251*/252try_to_freeze();253254/*255* Just make absolutely sure the Guest is still alive. One of256* those hypercalls could have been fatal, for example.257*/258if (cpu->lg->dead)259break;260261/*262* If the Guest asked to be stopped, we sleep. The Guest's263* clock timer will wake us.264*/265if (cpu->halted) {266set_current_state(TASK_INTERRUPTIBLE);267/*268* Just before we sleep, make sure no interrupt snuck in269* which we should be doing.270*/271if (interrupt_pending(cpu, &more) < LGUEST_IRQS)272set_current_state(TASK_RUNNING);273else274schedule();275continue;276}277278/*279* OK, now we're ready to jump into the Guest. First we put up280* the "Do Not Disturb" sign:281*/282local_irq_disable();283284/* Actually run the Guest until something happens. */285lguest_arch_run_guest(cpu);286287/* Now we're ready to be interrupted or moved to other CPUs */288local_irq_enable();289290/* Now we deal with whatever happened to the Guest. */291lguest_arch_handle_trap(cpu);292}293294/* Special case: Guest is 'dead' but wants a reboot. */295if (cpu->lg->dead == ERR_PTR(-ERESTART))296return -ERESTART;297298/* The Guest is dead => "No such file or directory" */299return -ENOENT;300}301302/*H:000303* Welcome to the Host!304*305* By this point your brain has been tickled by the Guest code and numbed by306* the Launcher code; prepare for it to be stretched by the Host code. This is307* the heart. Let's begin at the initialization routine for the Host's lg308* module.309*/310static int __init init(void)311{312int err;313314/* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */315if (paravirt_enabled()) {316printk("lguest is afraid of being a guest\n");317return -EPERM;318}319320/* First we put the Switcher up in very high virtual memory. */321err = map_switcher();322if (err)323goto out;324325/* Now we set up the pagetable implementation for the Guests. */326err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES);327if (err)328goto unmap;329330/* We might need to reserve an interrupt vector. */331err = init_interrupts();332if (err)333goto free_pgtables;334335/* /dev/lguest needs to be registered. */336err = lguest_device_init();337if (err)338goto free_interrupts;339340/* Finally we do some architecture-specific setup. */341lguest_arch_host_init();342343/* All good! */344return 0;345346free_interrupts:347free_interrupts();348free_pgtables:349free_pagetables();350unmap:351unmap_switcher();352out:353return err;354}355356/* Cleaning up is just the same code, backwards. With a little French. */357static void __exit fini(void)358{359lguest_device_remove();360free_interrupts();361free_pagetables();362unmap_switcher();363364lguest_arch_host_fini();365}366/*:*/367368/*369* The Host side of lguest can be a module. This is a nice way for people to370* play with it.371*/372module_init(init);373module_exit(fini);374MODULE_LICENSE("GPL");375MODULE_AUTHOR("Rusty Russell <[email protected]>");376377378