Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/kernel/kexec.c
10814 views
1
/*
2
* kexec.c - kexec system call
3
* Copyright (C) 2002-2004 Eric Biederman <[email protected]>
4
*
5
* This source code is licensed under the GNU General Public License,
6
* Version 2. See the file COPYING for more details.
7
*/
8
9
#include <linux/capability.h>
10
#include <linux/mm.h>
11
#include <linux/file.h>
12
#include <linux/slab.h>
13
#include <linux/fs.h>
14
#include <linux/kexec.h>
15
#include <linux/mutex.h>
16
#include <linux/list.h>
17
#include <linux/highmem.h>
18
#include <linux/syscalls.h>
19
#include <linux/reboot.h>
20
#include <linux/ioport.h>
21
#include <linux/hardirq.h>
22
#include <linux/elf.h>
23
#include <linux/elfcore.h>
24
#include <generated/utsrelease.h>
25
#include <linux/utsname.h>
26
#include <linux/numa.h>
27
#include <linux/suspend.h>
28
#include <linux/device.h>
29
#include <linux/freezer.h>
30
#include <linux/pm.h>
31
#include <linux/cpu.h>
32
#include <linux/console.h>
33
#include <linux/vmalloc.h>
34
#include <linux/swap.h>
35
#include <linux/kmsg_dump.h>
36
#include <linux/syscore_ops.h>
37
38
#include <asm/page.h>
39
#include <asm/uaccess.h>
40
#include <asm/io.h>
41
#include <asm/system.h>
42
#include <asm/sections.h>
43
44
/* Per cpu memory for storing cpu states in case of system crash. */
45
note_buf_t __percpu *crash_notes;
46
47
/* vmcoreinfo stuff */
48
static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
49
u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
50
size_t vmcoreinfo_size;
51
size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
52
53
/* Location of the reserved area for the crash kernel */
54
struct resource crashk_res = {
55
.name = "Crash kernel",
56
.start = 0,
57
.end = 0,
58
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
59
};
60
61
int kexec_should_crash(struct task_struct *p)
62
{
63
if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
64
return 1;
65
return 0;
66
}
67
68
/*
69
* When kexec transitions to the new kernel there is a one-to-one
70
* mapping between physical and virtual addresses. On processors
71
* where you can disable the MMU this is trivial, and easy. For
72
* others it is still a simple predictable page table to setup.
73
*
74
* In that environment kexec copies the new kernel to its final
75
* resting place. This means I can only support memory whose
76
* physical address can fit in an unsigned long. In particular
77
* addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
78
* If the assembly stub has more restrictive requirements
79
* KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
80
* defined more restrictively in <asm/kexec.h>.
81
*
82
* The code for the transition from the current kernel to the
83
* the new kernel is placed in the control_code_buffer, whose size
84
* is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
85
* page of memory is necessary, but some architectures require more.
86
* Because this memory must be identity mapped in the transition from
87
* virtual to physical addresses it must live in the range
88
* 0 - TASK_SIZE, as only the user space mappings are arbitrarily
89
* modifiable.
90
*
91
* The assembly stub in the control code buffer is passed a linked list
92
* of descriptor pages detailing the source pages of the new kernel,
93
* and the destination addresses of those source pages. As this data
94
* structure is not used in the context of the current OS, it must
95
* be self-contained.
96
*
97
* The code has been made to work with highmem pages and will use a
98
* destination page in its final resting place (if it happens
99
* to allocate it). The end product of this is that most of the
100
* physical address space, and most of RAM can be used.
101
*
102
* Future directions include:
103
* - allocating a page table with the control code buffer identity
104
* mapped, to simplify machine_kexec and make kexec_on_panic more
105
* reliable.
106
*/
107
108
/*
109
* KIMAGE_NO_DEST is an impossible destination address..., for
110
* allocating pages whose destination address we do not care about.
111
*/
112
#define KIMAGE_NO_DEST (-1UL)
113
114
static int kimage_is_destination_range(struct kimage *image,
115
unsigned long start, unsigned long end);
116
static struct page *kimage_alloc_page(struct kimage *image,
117
gfp_t gfp_mask,
118
unsigned long dest);
119
120
static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
121
unsigned long nr_segments,
122
struct kexec_segment __user *segments)
123
{
124
size_t segment_bytes;
125
struct kimage *image;
126
unsigned long i;
127
int result;
128
129
/* Allocate a controlling structure */
130
result = -ENOMEM;
131
image = kzalloc(sizeof(*image), GFP_KERNEL);
132
if (!image)
133
goto out;
134
135
image->head = 0;
136
image->entry = &image->head;
137
image->last_entry = &image->head;
138
image->control_page = ~0; /* By default this does not apply */
139
image->start = entry;
140
image->type = KEXEC_TYPE_DEFAULT;
141
142
/* Initialize the list of control pages */
143
INIT_LIST_HEAD(&image->control_pages);
144
145
/* Initialize the list of destination pages */
146
INIT_LIST_HEAD(&image->dest_pages);
147
148
/* Initialize the list of unusable pages */
149
INIT_LIST_HEAD(&image->unuseable_pages);
150
151
/* Read in the segments */
152
image->nr_segments = nr_segments;
153
segment_bytes = nr_segments * sizeof(*segments);
154
result = copy_from_user(image->segment, segments, segment_bytes);
155
if (result) {
156
result = -EFAULT;
157
goto out;
158
}
159
160
/*
161
* Verify we have good destination addresses. The caller is
162
* responsible for making certain we don't attempt to load
163
* the new image into invalid or reserved areas of RAM. This
164
* just verifies it is an address we can use.
165
*
166
* Since the kernel does everything in page size chunks ensure
167
* the destination addresses are page aligned. Too many
168
* special cases crop of when we don't do this. The most
169
* insidious is getting overlapping destination addresses
170
* simply because addresses are changed to page size
171
* granularity.
172
*/
173
result = -EADDRNOTAVAIL;
174
for (i = 0; i < nr_segments; i++) {
175
unsigned long mstart, mend;
176
177
mstart = image->segment[i].mem;
178
mend = mstart + image->segment[i].memsz;
179
if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
180
goto out;
181
if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
182
goto out;
183
}
184
185
/* Verify our destination addresses do not overlap.
186
* If we alloed overlapping destination addresses
187
* through very weird things can happen with no
188
* easy explanation as one segment stops on another.
189
*/
190
result = -EINVAL;
191
for (i = 0; i < nr_segments; i++) {
192
unsigned long mstart, mend;
193
unsigned long j;
194
195
mstart = image->segment[i].mem;
196
mend = mstart + image->segment[i].memsz;
197
for (j = 0; j < i; j++) {
198
unsigned long pstart, pend;
199
pstart = image->segment[j].mem;
200
pend = pstart + image->segment[j].memsz;
201
/* Do the segments overlap ? */
202
if ((mend > pstart) && (mstart < pend))
203
goto out;
204
}
205
}
206
207
/* Ensure our buffer sizes are strictly less than
208
* our memory sizes. This should always be the case,
209
* and it is easier to check up front than to be surprised
210
* later on.
211
*/
212
result = -EINVAL;
213
for (i = 0; i < nr_segments; i++) {
214
if (image->segment[i].bufsz > image->segment[i].memsz)
215
goto out;
216
}
217
218
result = 0;
219
out:
220
if (result == 0)
221
*rimage = image;
222
else
223
kfree(image);
224
225
return result;
226
227
}
228
229
static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
230
unsigned long nr_segments,
231
struct kexec_segment __user *segments)
232
{
233
int result;
234
struct kimage *image;
235
236
/* Allocate and initialize a controlling structure */
237
image = NULL;
238
result = do_kimage_alloc(&image, entry, nr_segments, segments);
239
if (result)
240
goto out;
241
242
*rimage = image;
243
244
/*
245
* Find a location for the control code buffer, and add it
246
* the vector of segments so that it's pages will also be
247
* counted as destination pages.
248
*/
249
result = -ENOMEM;
250
image->control_code_page = kimage_alloc_control_pages(image,
251
get_order(KEXEC_CONTROL_PAGE_SIZE));
252
if (!image->control_code_page) {
253
printk(KERN_ERR "Could not allocate control_code_buffer\n");
254
goto out;
255
}
256
257
image->swap_page = kimage_alloc_control_pages(image, 0);
258
if (!image->swap_page) {
259
printk(KERN_ERR "Could not allocate swap buffer\n");
260
goto out;
261
}
262
263
result = 0;
264
out:
265
if (result == 0)
266
*rimage = image;
267
else
268
kfree(image);
269
270
return result;
271
}
272
273
static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
274
unsigned long nr_segments,
275
struct kexec_segment __user *segments)
276
{
277
int result;
278
struct kimage *image;
279
unsigned long i;
280
281
image = NULL;
282
/* Verify we have a valid entry point */
283
if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
284
result = -EADDRNOTAVAIL;
285
goto out;
286
}
287
288
/* Allocate and initialize a controlling structure */
289
result = do_kimage_alloc(&image, entry, nr_segments, segments);
290
if (result)
291
goto out;
292
293
/* Enable the special crash kernel control page
294
* allocation policy.
295
*/
296
image->control_page = crashk_res.start;
297
image->type = KEXEC_TYPE_CRASH;
298
299
/*
300
* Verify we have good destination addresses. Normally
301
* the caller is responsible for making certain we don't
302
* attempt to load the new image into invalid or reserved
303
* areas of RAM. But crash kernels are preloaded into a
304
* reserved area of ram. We must ensure the addresses
305
* are in the reserved area otherwise preloading the
306
* kernel could corrupt things.
307
*/
308
result = -EADDRNOTAVAIL;
309
for (i = 0; i < nr_segments; i++) {
310
unsigned long mstart, mend;
311
312
mstart = image->segment[i].mem;
313
mend = mstart + image->segment[i].memsz - 1;
314
/* Ensure we are within the crash kernel limits */
315
if ((mstart < crashk_res.start) || (mend > crashk_res.end))
316
goto out;
317
}
318
319
/*
320
* Find a location for the control code buffer, and add
321
* the vector of segments so that it's pages will also be
322
* counted as destination pages.
323
*/
324
result = -ENOMEM;
325
image->control_code_page = kimage_alloc_control_pages(image,
326
get_order(KEXEC_CONTROL_PAGE_SIZE));
327
if (!image->control_code_page) {
328
printk(KERN_ERR "Could not allocate control_code_buffer\n");
329
goto out;
330
}
331
332
result = 0;
333
out:
334
if (result == 0)
335
*rimage = image;
336
else
337
kfree(image);
338
339
return result;
340
}
341
342
static int kimage_is_destination_range(struct kimage *image,
343
unsigned long start,
344
unsigned long end)
345
{
346
unsigned long i;
347
348
for (i = 0; i < image->nr_segments; i++) {
349
unsigned long mstart, mend;
350
351
mstart = image->segment[i].mem;
352
mend = mstart + image->segment[i].memsz;
353
if ((end > mstart) && (start < mend))
354
return 1;
355
}
356
357
return 0;
358
}
359
360
static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
361
{
362
struct page *pages;
363
364
pages = alloc_pages(gfp_mask, order);
365
if (pages) {
366
unsigned int count, i;
367
pages->mapping = NULL;
368
set_page_private(pages, order);
369
count = 1 << order;
370
for (i = 0; i < count; i++)
371
SetPageReserved(pages + i);
372
}
373
374
return pages;
375
}
376
377
static void kimage_free_pages(struct page *page)
378
{
379
unsigned int order, count, i;
380
381
order = page_private(page);
382
count = 1 << order;
383
for (i = 0; i < count; i++)
384
ClearPageReserved(page + i);
385
__free_pages(page, order);
386
}
387
388
static void kimage_free_page_list(struct list_head *list)
389
{
390
struct list_head *pos, *next;
391
392
list_for_each_safe(pos, next, list) {
393
struct page *page;
394
395
page = list_entry(pos, struct page, lru);
396
list_del(&page->lru);
397
kimage_free_pages(page);
398
}
399
}
400
401
static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
402
unsigned int order)
403
{
404
/* Control pages are special, they are the intermediaries
405
* that are needed while we copy the rest of the pages
406
* to their final resting place. As such they must
407
* not conflict with either the destination addresses
408
* or memory the kernel is already using.
409
*
410
* The only case where we really need more than one of
411
* these are for architectures where we cannot disable
412
* the MMU and must instead generate an identity mapped
413
* page table for all of the memory.
414
*
415
* At worst this runs in O(N) of the image size.
416
*/
417
struct list_head extra_pages;
418
struct page *pages;
419
unsigned int count;
420
421
count = 1 << order;
422
INIT_LIST_HEAD(&extra_pages);
423
424
/* Loop while I can allocate a page and the page allocated
425
* is a destination page.
426
*/
427
do {
428
unsigned long pfn, epfn, addr, eaddr;
429
430
pages = kimage_alloc_pages(GFP_KERNEL, order);
431
if (!pages)
432
break;
433
pfn = page_to_pfn(pages);
434
epfn = pfn + count;
435
addr = pfn << PAGE_SHIFT;
436
eaddr = epfn << PAGE_SHIFT;
437
if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
438
kimage_is_destination_range(image, addr, eaddr)) {
439
list_add(&pages->lru, &extra_pages);
440
pages = NULL;
441
}
442
} while (!pages);
443
444
if (pages) {
445
/* Remember the allocated page... */
446
list_add(&pages->lru, &image->control_pages);
447
448
/* Because the page is already in it's destination
449
* location we will never allocate another page at
450
* that address. Therefore kimage_alloc_pages
451
* will not return it (again) and we don't need
452
* to give it an entry in image->segment[].
453
*/
454
}
455
/* Deal with the destination pages I have inadvertently allocated.
456
*
457
* Ideally I would convert multi-page allocations into single
458
* page allocations, and add everything to image->dest_pages.
459
*
460
* For now it is simpler to just free the pages.
461
*/
462
kimage_free_page_list(&extra_pages);
463
464
return pages;
465
}
466
467
static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
468
unsigned int order)
469
{
470
/* Control pages are special, they are the intermediaries
471
* that are needed while we copy the rest of the pages
472
* to their final resting place. As such they must
473
* not conflict with either the destination addresses
474
* or memory the kernel is already using.
475
*
476
* Control pages are also the only pags we must allocate
477
* when loading a crash kernel. All of the other pages
478
* are specified by the segments and we just memcpy
479
* into them directly.
480
*
481
* The only case where we really need more than one of
482
* these are for architectures where we cannot disable
483
* the MMU and must instead generate an identity mapped
484
* page table for all of the memory.
485
*
486
* Given the low demand this implements a very simple
487
* allocator that finds the first hole of the appropriate
488
* size in the reserved memory region, and allocates all
489
* of the memory up to and including the hole.
490
*/
491
unsigned long hole_start, hole_end, size;
492
struct page *pages;
493
494
pages = NULL;
495
size = (1 << order) << PAGE_SHIFT;
496
hole_start = (image->control_page + (size - 1)) & ~(size - 1);
497
hole_end = hole_start + size - 1;
498
while (hole_end <= crashk_res.end) {
499
unsigned long i;
500
501
if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
502
break;
503
if (hole_end > crashk_res.end)
504
break;
505
/* See if I overlap any of the segments */
506
for (i = 0; i < image->nr_segments; i++) {
507
unsigned long mstart, mend;
508
509
mstart = image->segment[i].mem;
510
mend = mstart + image->segment[i].memsz - 1;
511
if ((hole_end >= mstart) && (hole_start <= mend)) {
512
/* Advance the hole to the end of the segment */
513
hole_start = (mend + (size - 1)) & ~(size - 1);
514
hole_end = hole_start + size - 1;
515
break;
516
}
517
}
518
/* If I don't overlap any segments I have found my hole! */
519
if (i == image->nr_segments) {
520
pages = pfn_to_page(hole_start >> PAGE_SHIFT);
521
break;
522
}
523
}
524
if (pages)
525
image->control_page = hole_end;
526
527
return pages;
528
}
529
530
531
struct page *kimage_alloc_control_pages(struct kimage *image,
532
unsigned int order)
533
{
534
struct page *pages = NULL;
535
536
switch (image->type) {
537
case KEXEC_TYPE_DEFAULT:
538
pages = kimage_alloc_normal_control_pages(image, order);
539
break;
540
case KEXEC_TYPE_CRASH:
541
pages = kimage_alloc_crash_control_pages(image, order);
542
break;
543
}
544
545
return pages;
546
}
547
548
static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
549
{
550
if (*image->entry != 0)
551
image->entry++;
552
553
if (image->entry == image->last_entry) {
554
kimage_entry_t *ind_page;
555
struct page *page;
556
557
page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
558
if (!page)
559
return -ENOMEM;
560
561
ind_page = page_address(page);
562
*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
563
image->entry = ind_page;
564
image->last_entry = ind_page +
565
((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
566
}
567
*image->entry = entry;
568
image->entry++;
569
*image->entry = 0;
570
571
return 0;
572
}
573
574
static int kimage_set_destination(struct kimage *image,
575
unsigned long destination)
576
{
577
int result;
578
579
destination &= PAGE_MASK;
580
result = kimage_add_entry(image, destination | IND_DESTINATION);
581
if (result == 0)
582
image->destination = destination;
583
584
return result;
585
}
586
587
588
static int kimage_add_page(struct kimage *image, unsigned long page)
589
{
590
int result;
591
592
page &= PAGE_MASK;
593
result = kimage_add_entry(image, page | IND_SOURCE);
594
if (result == 0)
595
image->destination += PAGE_SIZE;
596
597
return result;
598
}
599
600
601
static void kimage_free_extra_pages(struct kimage *image)
602
{
603
/* Walk through and free any extra destination pages I may have */
604
kimage_free_page_list(&image->dest_pages);
605
606
/* Walk through and free any unusable pages I have cached */
607
kimage_free_page_list(&image->unuseable_pages);
608
609
}
610
static void kimage_terminate(struct kimage *image)
611
{
612
if (*image->entry != 0)
613
image->entry++;
614
615
*image->entry = IND_DONE;
616
}
617
618
#define for_each_kimage_entry(image, ptr, entry) \
619
for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
620
ptr = (entry & IND_INDIRECTION)? \
621
phys_to_virt((entry & PAGE_MASK)): ptr +1)
622
623
static void kimage_free_entry(kimage_entry_t entry)
624
{
625
struct page *page;
626
627
page = pfn_to_page(entry >> PAGE_SHIFT);
628
kimage_free_pages(page);
629
}
630
631
static void kimage_free(struct kimage *image)
632
{
633
kimage_entry_t *ptr, entry;
634
kimage_entry_t ind = 0;
635
636
if (!image)
637
return;
638
639
kimage_free_extra_pages(image);
640
for_each_kimage_entry(image, ptr, entry) {
641
if (entry & IND_INDIRECTION) {
642
/* Free the previous indirection page */
643
if (ind & IND_INDIRECTION)
644
kimage_free_entry(ind);
645
/* Save this indirection page until we are
646
* done with it.
647
*/
648
ind = entry;
649
}
650
else if (entry & IND_SOURCE)
651
kimage_free_entry(entry);
652
}
653
/* Free the final indirection page */
654
if (ind & IND_INDIRECTION)
655
kimage_free_entry(ind);
656
657
/* Handle any machine specific cleanup */
658
machine_kexec_cleanup(image);
659
660
/* Free the kexec control pages... */
661
kimage_free_page_list(&image->control_pages);
662
kfree(image);
663
}
664
665
static kimage_entry_t *kimage_dst_used(struct kimage *image,
666
unsigned long page)
667
{
668
kimage_entry_t *ptr, entry;
669
unsigned long destination = 0;
670
671
for_each_kimage_entry(image, ptr, entry) {
672
if (entry & IND_DESTINATION)
673
destination = entry & PAGE_MASK;
674
else if (entry & IND_SOURCE) {
675
if (page == destination)
676
return ptr;
677
destination += PAGE_SIZE;
678
}
679
}
680
681
return NULL;
682
}
683
684
static struct page *kimage_alloc_page(struct kimage *image,
685
gfp_t gfp_mask,
686
unsigned long destination)
687
{
688
/*
689
* Here we implement safeguards to ensure that a source page
690
* is not copied to its destination page before the data on
691
* the destination page is no longer useful.
692
*
693
* To do this we maintain the invariant that a source page is
694
* either its own destination page, or it is not a
695
* destination page at all.
696
*
697
* That is slightly stronger than required, but the proof
698
* that no problems will not occur is trivial, and the
699
* implementation is simply to verify.
700
*
701
* When allocating all pages normally this algorithm will run
702
* in O(N) time, but in the worst case it will run in O(N^2)
703
* time. If the runtime is a problem the data structures can
704
* be fixed.
705
*/
706
struct page *page;
707
unsigned long addr;
708
709
/*
710
* Walk through the list of destination pages, and see if I
711
* have a match.
712
*/
713
list_for_each_entry(page, &image->dest_pages, lru) {
714
addr = page_to_pfn(page) << PAGE_SHIFT;
715
if (addr == destination) {
716
list_del(&page->lru);
717
return page;
718
}
719
}
720
page = NULL;
721
while (1) {
722
kimage_entry_t *old;
723
724
/* Allocate a page, if we run out of memory give up */
725
page = kimage_alloc_pages(gfp_mask, 0);
726
if (!page)
727
return NULL;
728
/* If the page cannot be used file it away */
729
if (page_to_pfn(page) >
730
(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
731
list_add(&page->lru, &image->unuseable_pages);
732
continue;
733
}
734
addr = page_to_pfn(page) << PAGE_SHIFT;
735
736
/* If it is the destination page we want use it */
737
if (addr == destination)
738
break;
739
740
/* If the page is not a destination page use it */
741
if (!kimage_is_destination_range(image, addr,
742
addr + PAGE_SIZE))
743
break;
744
745
/*
746
* I know that the page is someones destination page.
747
* See if there is already a source page for this
748
* destination page. And if so swap the source pages.
749
*/
750
old = kimage_dst_used(image, addr);
751
if (old) {
752
/* If so move it */
753
unsigned long old_addr;
754
struct page *old_page;
755
756
old_addr = *old & PAGE_MASK;
757
old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
758
copy_highpage(page, old_page);
759
*old = addr | (*old & ~PAGE_MASK);
760
761
/* The old page I have found cannot be a
762
* destination page, so return it if it's
763
* gfp_flags honor the ones passed in.
764
*/
765
if (!(gfp_mask & __GFP_HIGHMEM) &&
766
PageHighMem(old_page)) {
767
kimage_free_pages(old_page);
768
continue;
769
}
770
addr = old_addr;
771
page = old_page;
772
break;
773
}
774
else {
775
/* Place the page on the destination list I
776
* will use it later.
777
*/
778
list_add(&page->lru, &image->dest_pages);
779
}
780
}
781
782
return page;
783
}
784
785
static int kimage_load_normal_segment(struct kimage *image,
786
struct kexec_segment *segment)
787
{
788
unsigned long maddr;
789
unsigned long ubytes, mbytes;
790
int result;
791
unsigned char __user *buf;
792
793
result = 0;
794
buf = segment->buf;
795
ubytes = segment->bufsz;
796
mbytes = segment->memsz;
797
maddr = segment->mem;
798
799
result = kimage_set_destination(image, maddr);
800
if (result < 0)
801
goto out;
802
803
while (mbytes) {
804
struct page *page;
805
char *ptr;
806
size_t uchunk, mchunk;
807
808
page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
809
if (!page) {
810
result = -ENOMEM;
811
goto out;
812
}
813
result = kimage_add_page(image, page_to_pfn(page)
814
<< PAGE_SHIFT);
815
if (result < 0)
816
goto out;
817
818
ptr = kmap(page);
819
/* Start with a clear page */
820
clear_page(ptr);
821
ptr += maddr & ~PAGE_MASK;
822
mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
823
if (mchunk > mbytes)
824
mchunk = mbytes;
825
826
uchunk = mchunk;
827
if (uchunk > ubytes)
828
uchunk = ubytes;
829
830
result = copy_from_user(ptr, buf, uchunk);
831
kunmap(page);
832
if (result) {
833
result = -EFAULT;
834
goto out;
835
}
836
ubytes -= uchunk;
837
maddr += mchunk;
838
buf += mchunk;
839
mbytes -= mchunk;
840
}
841
out:
842
return result;
843
}
844
845
static int kimage_load_crash_segment(struct kimage *image,
846
struct kexec_segment *segment)
847
{
848
/* For crash dumps kernels we simply copy the data from
849
* user space to it's destination.
850
* We do things a page at a time for the sake of kmap.
851
*/
852
unsigned long maddr;
853
unsigned long ubytes, mbytes;
854
int result;
855
unsigned char __user *buf;
856
857
result = 0;
858
buf = segment->buf;
859
ubytes = segment->bufsz;
860
mbytes = segment->memsz;
861
maddr = segment->mem;
862
while (mbytes) {
863
struct page *page;
864
char *ptr;
865
size_t uchunk, mchunk;
866
867
page = pfn_to_page(maddr >> PAGE_SHIFT);
868
if (!page) {
869
result = -ENOMEM;
870
goto out;
871
}
872
ptr = kmap(page);
873
ptr += maddr & ~PAGE_MASK;
874
mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
875
if (mchunk > mbytes)
876
mchunk = mbytes;
877
878
uchunk = mchunk;
879
if (uchunk > ubytes) {
880
uchunk = ubytes;
881
/* Zero the trailing part of the page */
882
memset(ptr + uchunk, 0, mchunk - uchunk);
883
}
884
result = copy_from_user(ptr, buf, uchunk);
885
kexec_flush_icache_page(page);
886
kunmap(page);
887
if (result) {
888
result = -EFAULT;
889
goto out;
890
}
891
ubytes -= uchunk;
892
maddr += mchunk;
893
buf += mchunk;
894
mbytes -= mchunk;
895
}
896
out:
897
return result;
898
}
899
900
static int kimage_load_segment(struct kimage *image,
901
struct kexec_segment *segment)
902
{
903
int result = -ENOMEM;
904
905
switch (image->type) {
906
case KEXEC_TYPE_DEFAULT:
907
result = kimage_load_normal_segment(image, segment);
908
break;
909
case KEXEC_TYPE_CRASH:
910
result = kimage_load_crash_segment(image, segment);
911
break;
912
}
913
914
return result;
915
}
916
917
/*
918
* Exec Kernel system call: for obvious reasons only root may call it.
919
*
920
* This call breaks up into three pieces.
921
* - A generic part which loads the new kernel from the current
922
* address space, and very carefully places the data in the
923
* allocated pages.
924
*
925
* - A generic part that interacts with the kernel and tells all of
926
* the devices to shut down. Preventing on-going dmas, and placing
927
* the devices in a consistent state so a later kernel can
928
* reinitialize them.
929
*
930
* - A machine specific part that includes the syscall number
931
* and the copies the image to it's final destination. And
932
* jumps into the image at entry.
933
*
934
* kexec does not sync, or unmount filesystems so if you need
935
* that to happen you need to do that yourself.
936
*/
937
struct kimage *kexec_image;
938
struct kimage *kexec_crash_image;
939
940
static DEFINE_MUTEX(kexec_mutex);
941
942
SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
943
struct kexec_segment __user *, segments, unsigned long, flags)
944
{
945
struct kimage **dest_image, *image;
946
int result;
947
948
/* We only trust the superuser with rebooting the system. */
949
if (!capable(CAP_SYS_BOOT))
950
return -EPERM;
951
952
/*
953
* Verify we have a legal set of flags
954
* This leaves us room for future extensions.
955
*/
956
if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
957
return -EINVAL;
958
959
/* Verify we are on the appropriate architecture */
960
if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
961
((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
962
return -EINVAL;
963
964
/* Put an artificial cap on the number
965
* of segments passed to kexec_load.
966
*/
967
if (nr_segments > KEXEC_SEGMENT_MAX)
968
return -EINVAL;
969
970
image = NULL;
971
result = 0;
972
973
/* Because we write directly to the reserved memory
974
* region when loading crash kernels we need a mutex here to
975
* prevent multiple crash kernels from attempting to load
976
* simultaneously, and to prevent a crash kernel from loading
977
* over the top of a in use crash kernel.
978
*
979
* KISS: always take the mutex.
980
*/
981
if (!mutex_trylock(&kexec_mutex))
982
return -EBUSY;
983
984
dest_image = &kexec_image;
985
if (flags & KEXEC_ON_CRASH)
986
dest_image = &kexec_crash_image;
987
if (nr_segments > 0) {
988
unsigned long i;
989
990
/* Loading another kernel to reboot into */
991
if ((flags & KEXEC_ON_CRASH) == 0)
992
result = kimage_normal_alloc(&image, entry,
993
nr_segments, segments);
994
/* Loading another kernel to switch to if this one crashes */
995
else if (flags & KEXEC_ON_CRASH) {
996
/* Free any current crash dump kernel before
997
* we corrupt it.
998
*/
999
kimage_free(xchg(&kexec_crash_image, NULL));
1000
result = kimage_crash_alloc(&image, entry,
1001
nr_segments, segments);
1002
}
1003
if (result)
1004
goto out;
1005
1006
if (flags & KEXEC_PRESERVE_CONTEXT)
1007
image->preserve_context = 1;
1008
result = machine_kexec_prepare(image);
1009
if (result)
1010
goto out;
1011
1012
for (i = 0; i < nr_segments; i++) {
1013
result = kimage_load_segment(image, &image->segment[i]);
1014
if (result)
1015
goto out;
1016
}
1017
kimage_terminate(image);
1018
}
1019
/* Install the new kernel, and Uninstall the old */
1020
image = xchg(dest_image, image);
1021
1022
out:
1023
mutex_unlock(&kexec_mutex);
1024
kimage_free(image);
1025
1026
return result;
1027
}
1028
1029
#ifdef CONFIG_COMPAT
1030
asmlinkage long compat_sys_kexec_load(unsigned long entry,
1031
unsigned long nr_segments,
1032
struct compat_kexec_segment __user *segments,
1033
unsigned long flags)
1034
{
1035
struct compat_kexec_segment in;
1036
struct kexec_segment out, __user *ksegments;
1037
unsigned long i, result;
1038
1039
/* Don't allow clients that don't understand the native
1040
* architecture to do anything.
1041
*/
1042
if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
1043
return -EINVAL;
1044
1045
if (nr_segments > KEXEC_SEGMENT_MAX)
1046
return -EINVAL;
1047
1048
ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
1049
for (i=0; i < nr_segments; i++) {
1050
result = copy_from_user(&in, &segments[i], sizeof(in));
1051
if (result)
1052
return -EFAULT;
1053
1054
out.buf = compat_ptr(in.buf);
1055
out.bufsz = in.bufsz;
1056
out.mem = in.mem;
1057
out.memsz = in.memsz;
1058
1059
result = copy_to_user(&ksegments[i], &out, sizeof(out));
1060
if (result)
1061
return -EFAULT;
1062
}
1063
1064
return sys_kexec_load(entry, nr_segments, ksegments, flags);
1065
}
1066
#endif
1067
1068
void crash_kexec(struct pt_regs *regs)
1069
{
1070
/* Take the kexec_mutex here to prevent sys_kexec_load
1071
* running on one cpu from replacing the crash kernel
1072
* we are using after a panic on a different cpu.
1073
*
1074
* If the crash kernel was not located in a fixed area
1075
* of memory the xchg(&kexec_crash_image) would be
1076
* sufficient. But since I reuse the memory...
1077
*/
1078
if (mutex_trylock(&kexec_mutex)) {
1079
if (kexec_crash_image) {
1080
struct pt_regs fixed_regs;
1081
1082
kmsg_dump(KMSG_DUMP_KEXEC);
1083
1084
crash_setup_regs(&fixed_regs, regs);
1085
crash_save_vmcoreinfo();
1086
machine_crash_shutdown(&fixed_regs);
1087
machine_kexec(kexec_crash_image);
1088
}
1089
mutex_unlock(&kexec_mutex);
1090
}
1091
}
1092
1093
size_t crash_get_memory_size(void)
1094
{
1095
size_t size = 0;
1096
mutex_lock(&kexec_mutex);
1097
if (crashk_res.end != crashk_res.start)
1098
size = crashk_res.end - crashk_res.start + 1;
1099
mutex_unlock(&kexec_mutex);
1100
return size;
1101
}
1102
1103
void __weak crash_free_reserved_phys_range(unsigned long begin,
1104
unsigned long end)
1105
{
1106
unsigned long addr;
1107
1108
for (addr = begin; addr < end; addr += PAGE_SIZE) {
1109
ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
1110
init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
1111
free_page((unsigned long)__va(addr));
1112
totalram_pages++;
1113
}
1114
}
1115
1116
int crash_shrink_memory(unsigned long new_size)
1117
{
1118
int ret = 0;
1119
unsigned long start, end;
1120
1121
mutex_lock(&kexec_mutex);
1122
1123
if (kexec_crash_image) {
1124
ret = -ENOENT;
1125
goto unlock;
1126
}
1127
start = crashk_res.start;
1128
end = crashk_res.end;
1129
1130
if (new_size >= end - start + 1) {
1131
ret = -EINVAL;
1132
if (new_size == end - start + 1)
1133
ret = 0;
1134
goto unlock;
1135
}
1136
1137
start = roundup(start, PAGE_SIZE);
1138
end = roundup(start + new_size, PAGE_SIZE);
1139
1140
crash_free_reserved_phys_range(end, crashk_res.end);
1141
1142
if ((start == end) && (crashk_res.parent != NULL))
1143
release_resource(&crashk_res);
1144
crashk_res.end = end - 1;
1145
1146
unlock:
1147
mutex_unlock(&kexec_mutex);
1148
return ret;
1149
}
1150
1151
static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
1152
size_t data_len)
1153
{
1154
struct elf_note note;
1155
1156
note.n_namesz = strlen(name) + 1;
1157
note.n_descsz = data_len;
1158
note.n_type = type;
1159
memcpy(buf, &note, sizeof(note));
1160
buf += (sizeof(note) + 3)/4;
1161
memcpy(buf, name, note.n_namesz);
1162
buf += (note.n_namesz + 3)/4;
1163
memcpy(buf, data, note.n_descsz);
1164
buf += (note.n_descsz + 3)/4;
1165
1166
return buf;
1167
}
1168
1169
static void final_note(u32 *buf)
1170
{
1171
struct elf_note note;
1172
1173
note.n_namesz = 0;
1174
note.n_descsz = 0;
1175
note.n_type = 0;
1176
memcpy(buf, &note, sizeof(note));
1177
}
1178
1179
void crash_save_cpu(struct pt_regs *regs, int cpu)
1180
{
1181
struct elf_prstatus prstatus;
1182
u32 *buf;
1183
1184
if ((cpu < 0) || (cpu >= nr_cpu_ids))
1185
return;
1186
1187
/* Using ELF notes here is opportunistic.
1188
* I need a well defined structure format
1189
* for the data I pass, and I need tags
1190
* on the data to indicate what information I have
1191
* squirrelled away. ELF notes happen to provide
1192
* all of that, so there is no need to invent something new.
1193
*/
1194
buf = (u32*)per_cpu_ptr(crash_notes, cpu);
1195
if (!buf)
1196
return;
1197
memset(&prstatus, 0, sizeof(prstatus));
1198
prstatus.pr_pid = current->pid;
1199
elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1200
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1201
&prstatus, sizeof(prstatus));
1202
final_note(buf);
1203
}
1204
1205
static int __init crash_notes_memory_init(void)
1206
{
1207
/* Allocate memory for saving cpu registers. */
1208
crash_notes = alloc_percpu(note_buf_t);
1209
if (!crash_notes) {
1210
printk("Kexec: Memory allocation for saving cpu register"
1211
" states failed\n");
1212
return -ENOMEM;
1213
}
1214
return 0;
1215
}
1216
module_init(crash_notes_memory_init)
1217
1218
1219
/*
1220
* parsing the "crashkernel" commandline
1221
*
1222
* this code is intended to be called from architecture specific code
1223
*/
1224
1225
1226
/*
1227
* This function parses command lines in the format
1228
*
1229
* crashkernel=ramsize-range:size[,...][@offset]
1230
*
1231
* The function returns 0 on success and -EINVAL on failure.
1232
*/
1233
static int __init parse_crashkernel_mem(char *cmdline,
1234
unsigned long long system_ram,
1235
unsigned long long *crash_size,
1236
unsigned long long *crash_base)
1237
{
1238
char *cur = cmdline, *tmp;
1239
1240
/* for each entry of the comma-separated list */
1241
do {
1242
unsigned long long start, end = ULLONG_MAX, size;
1243
1244
/* get the start of the range */
1245
start = memparse(cur, &tmp);
1246
if (cur == tmp) {
1247
pr_warning("crashkernel: Memory value expected\n");
1248
return -EINVAL;
1249
}
1250
cur = tmp;
1251
if (*cur != '-') {
1252
pr_warning("crashkernel: '-' expected\n");
1253
return -EINVAL;
1254
}
1255
cur++;
1256
1257
/* if no ':' is here, than we read the end */
1258
if (*cur != ':') {
1259
end = memparse(cur, &tmp);
1260
if (cur == tmp) {
1261
pr_warning("crashkernel: Memory "
1262
"value expected\n");
1263
return -EINVAL;
1264
}
1265
cur = tmp;
1266
if (end <= start) {
1267
pr_warning("crashkernel: end <= start\n");
1268
return -EINVAL;
1269
}
1270
}
1271
1272
if (*cur != ':') {
1273
pr_warning("crashkernel: ':' expected\n");
1274
return -EINVAL;
1275
}
1276
cur++;
1277
1278
size = memparse(cur, &tmp);
1279
if (cur == tmp) {
1280
pr_warning("Memory value expected\n");
1281
return -EINVAL;
1282
}
1283
cur = tmp;
1284
if (size >= system_ram) {
1285
pr_warning("crashkernel: invalid size\n");
1286
return -EINVAL;
1287
}
1288
1289
/* match ? */
1290
if (system_ram >= start && system_ram < end) {
1291
*crash_size = size;
1292
break;
1293
}
1294
} while (*cur++ == ',');
1295
1296
if (*crash_size > 0) {
1297
while (*cur && *cur != ' ' && *cur != '@')
1298
cur++;
1299
if (*cur == '@') {
1300
cur++;
1301
*crash_base = memparse(cur, &tmp);
1302
if (cur == tmp) {
1303
pr_warning("Memory value expected "
1304
"after '@'\n");
1305
return -EINVAL;
1306
}
1307
}
1308
}
1309
1310
return 0;
1311
}
1312
1313
/*
1314
* That function parses "simple" (old) crashkernel command lines like
1315
*
1316
* crashkernel=size[@offset]
1317
*
1318
* It returns 0 on success and -EINVAL on failure.
1319
*/
1320
static int __init parse_crashkernel_simple(char *cmdline,
1321
unsigned long long *crash_size,
1322
unsigned long long *crash_base)
1323
{
1324
char *cur = cmdline;
1325
1326
*crash_size = memparse(cmdline, &cur);
1327
if (cmdline == cur) {
1328
pr_warning("crashkernel: memory value expected\n");
1329
return -EINVAL;
1330
}
1331
1332
if (*cur == '@')
1333
*crash_base = memparse(cur+1, &cur);
1334
1335
return 0;
1336
}
1337
1338
/*
1339
* That function is the entry point for command line parsing and should be
1340
* called from the arch-specific code.
1341
*/
1342
int __init parse_crashkernel(char *cmdline,
1343
unsigned long long system_ram,
1344
unsigned long long *crash_size,
1345
unsigned long long *crash_base)
1346
{
1347
char *p = cmdline, *ck_cmdline = NULL;
1348
char *first_colon, *first_space;
1349
1350
BUG_ON(!crash_size || !crash_base);
1351
*crash_size = 0;
1352
*crash_base = 0;
1353
1354
/* find crashkernel and use the last one if there are more */
1355
p = strstr(p, "crashkernel=");
1356
while (p) {
1357
ck_cmdline = p;
1358
p = strstr(p+1, "crashkernel=");
1359
}
1360
1361
if (!ck_cmdline)
1362
return -EINVAL;
1363
1364
ck_cmdline += 12; /* strlen("crashkernel=") */
1365
1366
/*
1367
* if the commandline contains a ':', then that's the extended
1368
* syntax -- if not, it must be the classic syntax
1369
*/
1370
first_colon = strchr(ck_cmdline, ':');
1371
first_space = strchr(ck_cmdline, ' ');
1372
if (first_colon && (!first_space || first_colon < first_space))
1373
return parse_crashkernel_mem(ck_cmdline, system_ram,
1374
crash_size, crash_base);
1375
else
1376
return parse_crashkernel_simple(ck_cmdline, crash_size,
1377
crash_base);
1378
1379
return 0;
1380
}
1381
1382
1383
1384
void crash_save_vmcoreinfo(void)
1385
{
1386
u32 *buf;
1387
1388
if (!vmcoreinfo_size)
1389
return;
1390
1391
vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
1392
1393
buf = (u32 *)vmcoreinfo_note;
1394
1395
buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1396
vmcoreinfo_size);
1397
1398
final_note(buf);
1399
}
1400
1401
void vmcoreinfo_append_str(const char *fmt, ...)
1402
{
1403
va_list args;
1404
char buf[0x50];
1405
int r;
1406
1407
va_start(args, fmt);
1408
r = vsnprintf(buf, sizeof(buf), fmt, args);
1409
va_end(args);
1410
1411
if (r + vmcoreinfo_size > vmcoreinfo_max_size)
1412
r = vmcoreinfo_max_size - vmcoreinfo_size;
1413
1414
memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1415
1416
vmcoreinfo_size += r;
1417
}
1418
1419
/*
1420
* provide an empty default implementation here -- architecture
1421
* code may override this
1422
*/
1423
void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
1424
{}
1425
1426
unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
1427
{
1428
return __pa((unsigned long)(char *)&vmcoreinfo_note);
1429
}
1430
1431
static int __init crash_save_vmcoreinfo_init(void)
1432
{
1433
VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1434
VMCOREINFO_PAGESIZE(PAGE_SIZE);
1435
1436
VMCOREINFO_SYMBOL(init_uts_ns);
1437
VMCOREINFO_SYMBOL(node_online_map);
1438
VMCOREINFO_SYMBOL(swapper_pg_dir);
1439
VMCOREINFO_SYMBOL(_stext);
1440
VMCOREINFO_SYMBOL(vmlist);
1441
1442
#ifndef CONFIG_NEED_MULTIPLE_NODES
1443
VMCOREINFO_SYMBOL(mem_map);
1444
VMCOREINFO_SYMBOL(contig_page_data);
1445
#endif
1446
#ifdef CONFIG_SPARSEMEM
1447
VMCOREINFO_SYMBOL(mem_section);
1448
VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1449
VMCOREINFO_STRUCT_SIZE(mem_section);
1450
VMCOREINFO_OFFSET(mem_section, section_mem_map);
1451
#endif
1452
VMCOREINFO_STRUCT_SIZE(page);
1453
VMCOREINFO_STRUCT_SIZE(pglist_data);
1454
VMCOREINFO_STRUCT_SIZE(zone);
1455
VMCOREINFO_STRUCT_SIZE(free_area);
1456
VMCOREINFO_STRUCT_SIZE(list_head);
1457
VMCOREINFO_SIZE(nodemask_t);
1458
VMCOREINFO_OFFSET(page, flags);
1459
VMCOREINFO_OFFSET(page, _count);
1460
VMCOREINFO_OFFSET(page, mapping);
1461
VMCOREINFO_OFFSET(page, lru);
1462
VMCOREINFO_OFFSET(pglist_data, node_zones);
1463
VMCOREINFO_OFFSET(pglist_data, nr_zones);
1464
#ifdef CONFIG_FLAT_NODE_MEM_MAP
1465
VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1466
#endif
1467
VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1468
VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1469
VMCOREINFO_OFFSET(pglist_data, node_id);
1470
VMCOREINFO_OFFSET(zone, free_area);
1471
VMCOREINFO_OFFSET(zone, vm_stat);
1472
VMCOREINFO_OFFSET(zone, spanned_pages);
1473
VMCOREINFO_OFFSET(free_area, free_list);
1474
VMCOREINFO_OFFSET(list_head, next);
1475
VMCOREINFO_OFFSET(list_head, prev);
1476
VMCOREINFO_OFFSET(vm_struct, addr);
1477
VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1478
log_buf_kexec_setup();
1479
VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1480
VMCOREINFO_NUMBER(NR_FREE_PAGES);
1481
VMCOREINFO_NUMBER(PG_lru);
1482
VMCOREINFO_NUMBER(PG_private);
1483
VMCOREINFO_NUMBER(PG_swapcache);
1484
1485
arch_crash_save_vmcoreinfo();
1486
1487
return 0;
1488
}
1489
1490
module_init(crash_save_vmcoreinfo_init)
1491
1492
/*
1493
* Move into place and start executing a preloaded standalone
1494
* executable. If nothing was preloaded return an error.
1495
*/
1496
int kernel_kexec(void)
1497
{
1498
int error = 0;
1499
1500
if (!mutex_trylock(&kexec_mutex))
1501
return -EBUSY;
1502
if (!kexec_image) {
1503
error = -EINVAL;
1504
goto Unlock;
1505
}
1506
1507
#ifdef CONFIG_KEXEC_JUMP
1508
if (kexec_image->preserve_context) {
1509
mutex_lock(&pm_mutex);
1510
pm_prepare_console();
1511
error = freeze_processes();
1512
if (error) {
1513
error = -EBUSY;
1514
goto Restore_console;
1515
}
1516
suspend_console();
1517
error = dpm_suspend_start(PMSG_FREEZE);
1518
if (error)
1519
goto Resume_console;
1520
/* At this point, dpm_suspend_start() has been called,
1521
* but *not* dpm_suspend_noirq(). We *must* call
1522
* dpm_suspend_noirq() now. Otherwise, drivers for
1523
* some devices (e.g. interrupt controllers) become
1524
* desynchronized with the actual state of the
1525
* hardware at resume time, and evil weirdness ensues.
1526
*/
1527
error = dpm_suspend_noirq(PMSG_FREEZE);
1528
if (error)
1529
goto Resume_devices;
1530
error = disable_nonboot_cpus();
1531
if (error)
1532
goto Enable_cpus;
1533
local_irq_disable();
1534
error = syscore_suspend();
1535
if (error)
1536
goto Enable_irqs;
1537
} else
1538
#endif
1539
{
1540
kernel_restart_prepare(NULL);
1541
printk(KERN_EMERG "Starting new kernel\n");
1542
machine_shutdown();
1543
}
1544
1545
machine_kexec(kexec_image);
1546
1547
#ifdef CONFIG_KEXEC_JUMP
1548
if (kexec_image->preserve_context) {
1549
syscore_resume();
1550
Enable_irqs:
1551
local_irq_enable();
1552
Enable_cpus:
1553
enable_nonboot_cpus();
1554
dpm_resume_noirq(PMSG_RESTORE);
1555
Resume_devices:
1556
dpm_resume_end(PMSG_RESTORE);
1557
Resume_console:
1558
resume_console();
1559
thaw_processes();
1560
Restore_console:
1561
pm_restore_console();
1562
mutex_unlock(&pm_mutex);
1563
}
1564
#endif
1565
1566
Unlock:
1567
mutex_unlock(&kexec_mutex);
1568
return error;
1569
}
1570
1571