Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/lib/code-patching.c
25922 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Copyright 2008 Michael Ellerman, IBM Corporation.
4
*/
5
6
#include <linux/kprobes.h>
7
#include <linux/mmu_context.h>
8
#include <linux/random.h>
9
#include <linux/vmalloc.h>
10
#include <linux/init.h>
11
#include <linux/cpuhotplug.h>
12
#include <linux/uaccess.h>
13
#include <linux/jump_label.h>
14
15
#include <asm/debug.h>
16
#include <asm/pgalloc.h>
17
#include <asm/tlb.h>
18
#include <asm/tlbflush.h>
19
#include <asm/page.h>
20
#include <asm/text-patching.h>
21
#include <asm/inst.h>
22
23
static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword)
24
{
25
if (!IS_ENABLED(CONFIG_PPC64) || likely(!is_dword)) {
26
/* For big endian correctness: plain address would use the wrong half */
27
u32 val32 = val;
28
29
__put_kernel_nofault(patch_addr, &val32, u32, failed);
30
} else {
31
__put_kernel_nofault(patch_addr, &val, u64, failed);
32
}
33
34
asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
35
"r" (exec_addr));
36
37
return 0;
38
39
failed:
40
mb(); /* sync */
41
return -EPERM;
42
}
43
44
int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
45
{
46
if (ppc_inst_prefixed(instr))
47
return __patch_mem(addr, ppc_inst_as_ulong(instr), addr, true);
48
else
49
return __patch_mem(addr, ppc_inst_val(instr), addr, false);
50
}
51
52
struct patch_context {
53
union {
54
struct vm_struct *area;
55
struct mm_struct *mm;
56
};
57
unsigned long addr;
58
pte_t *pte;
59
};
60
61
static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
62
63
static int map_patch_area(void *addr, unsigned long text_poke_addr);
64
static void unmap_patch_area(unsigned long addr);
65
66
static bool mm_patch_enabled(void)
67
{
68
return IS_ENABLED(CONFIG_SMP) && radix_enabled();
69
}
70
71
/*
72
* The following applies for Radix MMU. Hash MMU has different requirements,
73
* and so is not supported.
74
*
75
* Changing mm requires context synchronising instructions on both sides of
76
* the context switch, as well as a hwsync between the last instruction for
77
* which the address of an associated storage access was translated using
78
* the current context.
79
*
80
* switch_mm_irqs_off() performs an isync after the context switch. It is
81
* the responsibility of the caller to perform the CSI and hwsync before
82
* starting/stopping the temp mm.
83
*/
84
static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
85
{
86
struct mm_struct *orig_mm = current->active_mm;
87
88
lockdep_assert_irqs_disabled();
89
switch_mm_irqs_off(orig_mm, temp_mm, current);
90
91
WARN_ON(!mm_is_thread_local(temp_mm));
92
93
suspend_breakpoints();
94
return orig_mm;
95
}
96
97
static void stop_using_temp_mm(struct mm_struct *temp_mm,
98
struct mm_struct *orig_mm)
99
{
100
lockdep_assert_irqs_disabled();
101
switch_mm_irqs_off(temp_mm, orig_mm, current);
102
restore_breakpoints();
103
}
104
105
static int text_area_cpu_up(unsigned int cpu)
106
{
107
struct vm_struct *area;
108
unsigned long addr;
109
int err;
110
111
area = get_vm_area(PAGE_SIZE, 0);
112
if (!area) {
113
WARN_ONCE(1, "Failed to create text area for cpu %d\n",
114
cpu);
115
return -1;
116
}
117
118
// Map/unmap the area to ensure all page tables are pre-allocated
119
addr = (unsigned long)area->addr;
120
err = map_patch_area(empty_zero_page, addr);
121
if (err)
122
return err;
123
124
unmap_patch_area(addr);
125
126
this_cpu_write(cpu_patching_context.area, area);
127
this_cpu_write(cpu_patching_context.addr, addr);
128
this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
129
130
return 0;
131
}
132
133
static int text_area_cpu_down(unsigned int cpu)
134
{
135
free_vm_area(this_cpu_read(cpu_patching_context.area));
136
this_cpu_write(cpu_patching_context.area, NULL);
137
this_cpu_write(cpu_patching_context.addr, 0);
138
this_cpu_write(cpu_patching_context.pte, NULL);
139
return 0;
140
}
141
142
static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
143
{
144
struct mmu_gather tlb;
145
146
tlb_gather_mmu(&tlb, mm);
147
free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
148
mmput(mm);
149
}
150
151
static int text_area_cpu_up_mm(unsigned int cpu)
152
{
153
struct mm_struct *mm;
154
unsigned long addr;
155
pte_t *pte;
156
spinlock_t *ptl;
157
158
mm = mm_alloc();
159
if (WARN_ON(!mm))
160
goto fail_no_mm;
161
162
/*
163
* Choose a random page-aligned address from the interval
164
* [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
165
* The lower address bound is PAGE_SIZE to avoid the zero-page.
166
*/
167
addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
168
169
/*
170
* PTE allocation uses GFP_KERNEL which means we need to
171
* pre-allocate the PTE here because we cannot do the
172
* allocation during patching when IRQs are disabled.
173
*
174
* Using get_locked_pte() to avoid open coding, the lock
175
* is unnecessary.
176
*/
177
pte = get_locked_pte(mm, addr, &ptl);
178
if (!pte)
179
goto fail_no_pte;
180
pte_unmap_unlock(pte, ptl);
181
182
this_cpu_write(cpu_patching_context.mm, mm);
183
this_cpu_write(cpu_patching_context.addr, addr);
184
185
return 0;
186
187
fail_no_pte:
188
put_patching_mm(mm, addr);
189
fail_no_mm:
190
return -ENOMEM;
191
}
192
193
static int text_area_cpu_down_mm(unsigned int cpu)
194
{
195
put_patching_mm(this_cpu_read(cpu_patching_context.mm),
196
this_cpu_read(cpu_patching_context.addr));
197
198
this_cpu_write(cpu_patching_context.mm, NULL);
199
this_cpu_write(cpu_patching_context.addr, 0);
200
201
return 0;
202
}
203
204
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
205
206
void __init poking_init(void)
207
{
208
int ret;
209
210
if (mm_patch_enabled())
211
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
212
"powerpc/text_poke_mm:online",
213
text_area_cpu_up_mm,
214
text_area_cpu_down_mm);
215
else
216
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
217
"powerpc/text_poke:online",
218
text_area_cpu_up,
219
text_area_cpu_down);
220
221
/* cpuhp_setup_state returns >= 0 on success */
222
if (WARN_ON(ret < 0))
223
return;
224
225
static_branch_enable(&poking_init_done);
226
}
227
228
static unsigned long get_patch_pfn(void *addr)
229
{
230
if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
231
return vmalloc_to_pfn(addr);
232
else
233
return __pa_symbol(addr) >> PAGE_SHIFT;
234
}
235
236
/*
237
* This can be called for kernel text or a module.
238
*/
239
static int map_patch_area(void *addr, unsigned long text_poke_addr)
240
{
241
unsigned long pfn = get_patch_pfn(addr);
242
243
return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
244
}
245
246
static void unmap_patch_area(unsigned long addr)
247
{
248
pte_t *ptep;
249
pmd_t *pmdp;
250
pud_t *pudp;
251
p4d_t *p4dp;
252
pgd_t *pgdp;
253
254
pgdp = pgd_offset_k(addr);
255
if (WARN_ON(pgd_none(*pgdp)))
256
return;
257
258
p4dp = p4d_offset(pgdp, addr);
259
if (WARN_ON(p4d_none(*p4dp)))
260
return;
261
262
pudp = pud_offset(p4dp, addr);
263
if (WARN_ON(pud_none(*pudp)))
264
return;
265
266
pmdp = pmd_offset(pudp, addr);
267
if (WARN_ON(pmd_none(*pmdp)))
268
return;
269
270
ptep = pte_offset_kernel(pmdp, addr);
271
if (WARN_ON(pte_none(*ptep)))
272
return;
273
274
/*
275
* In hash, pte_clear flushes the tlb, in radix, we have to
276
*/
277
pte_clear(&init_mm, addr, ptep);
278
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
279
}
280
281
static int __do_patch_mem_mm(void *addr, unsigned long val, bool is_dword)
282
{
283
int err;
284
u32 *patch_addr;
285
unsigned long text_poke_addr;
286
pte_t *pte;
287
unsigned long pfn = get_patch_pfn(addr);
288
struct mm_struct *patching_mm;
289
struct mm_struct *orig_mm;
290
spinlock_t *ptl;
291
292
patching_mm = __this_cpu_read(cpu_patching_context.mm);
293
text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
294
patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
295
296
pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
297
if (!pte)
298
return -ENOMEM;
299
300
__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
301
302
/* order PTE update before use, also serves as the hwsync */
303
asm volatile("ptesync": : :"memory");
304
305
/* order context switch after arbitrary prior code */
306
isync();
307
308
orig_mm = start_using_temp_mm(patching_mm);
309
310
err = __patch_mem(addr, val, patch_addr, is_dword);
311
312
/* context synchronisation performed by __patch_instruction (isync or exception) */
313
stop_using_temp_mm(patching_mm, orig_mm);
314
315
pte_clear(patching_mm, text_poke_addr, pte);
316
/*
317
* ptesync to order PTE update before TLB invalidation done
318
* by radix__local_flush_tlb_page_psize (in _tlbiel_va)
319
*/
320
local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
321
322
pte_unmap_unlock(pte, ptl);
323
324
return err;
325
}
326
327
static int __do_patch_mem(void *addr, unsigned long val, bool is_dword)
328
{
329
int err;
330
u32 *patch_addr;
331
unsigned long text_poke_addr;
332
pte_t *pte;
333
unsigned long pfn = get_patch_pfn(addr);
334
335
text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
336
patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
337
338
pte = __this_cpu_read(cpu_patching_context.pte);
339
__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
340
/* See ptesync comment in radix__set_pte_at() */
341
if (radix_enabled())
342
asm volatile("ptesync": : :"memory");
343
344
err = __patch_mem(addr, val, patch_addr, is_dword);
345
346
pte_clear(&init_mm, text_poke_addr, pte);
347
flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
348
349
return err;
350
}
351
352
static int patch_mem(void *addr, unsigned long val, bool is_dword)
353
{
354
int err;
355
unsigned long flags;
356
357
/*
358
* During early early boot patch_instruction is called
359
* when text_poke_area is not ready, but we still need
360
* to allow patching. We just do the plain old patching
361
*/
362
if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
363
!static_branch_likely(&poking_init_done))
364
return __patch_mem(addr, val, addr, is_dword);
365
366
local_irq_save(flags);
367
if (mm_patch_enabled())
368
err = __do_patch_mem_mm(addr, val, is_dword);
369
else
370
err = __do_patch_mem(addr, val, is_dword);
371
local_irq_restore(flags);
372
373
return err;
374
}
375
376
#ifdef CONFIG_PPC64
377
378
int patch_instruction(u32 *addr, ppc_inst_t instr)
379
{
380
if (ppc_inst_prefixed(instr))
381
return patch_mem(addr, ppc_inst_as_ulong(instr), true);
382
else
383
return patch_mem(addr, ppc_inst_val(instr), false);
384
}
385
NOKPROBE_SYMBOL(patch_instruction);
386
387
int patch_uint(void *addr, unsigned int val)
388
{
389
if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int)))
390
return -EINVAL;
391
392
return patch_mem(addr, val, false);
393
}
394
NOKPROBE_SYMBOL(patch_uint);
395
396
int patch_ulong(void *addr, unsigned long val)
397
{
398
if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long)))
399
return -EINVAL;
400
401
return patch_mem(addr, val, true);
402
}
403
NOKPROBE_SYMBOL(patch_ulong);
404
405
#else
406
407
int patch_instruction(u32 *addr, ppc_inst_t instr)
408
{
409
return patch_mem(addr, ppc_inst_val(instr), false);
410
}
411
NOKPROBE_SYMBOL(patch_instruction)
412
413
#endif
414
415
static int patch_memset64(u64 *addr, u64 val, size_t count)
416
{
417
for (u64 *end = addr + count; addr < end; addr++)
418
__put_kernel_nofault(addr, &val, u64, failed);
419
420
return 0;
421
422
failed:
423
return -EPERM;
424
}
425
426
static int patch_memset32(u32 *addr, u32 val, size_t count)
427
{
428
for (u32 *end = addr + count; addr < end; addr++)
429
__put_kernel_nofault(addr, &val, u32, failed);
430
431
return 0;
432
433
failed:
434
return -EPERM;
435
}
436
437
static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr)
438
{
439
unsigned long start = (unsigned long)patch_addr;
440
int err;
441
442
/* Repeat instruction */
443
if (repeat_instr) {
444
ppc_inst_t instr = ppc_inst_read(code);
445
446
if (ppc_inst_prefixed(instr)) {
447
u64 val = ppc_inst_as_ulong(instr);
448
449
err = patch_memset64((u64 *)patch_addr, val, len / 8);
450
} else {
451
u32 val = ppc_inst_val(instr);
452
453
err = patch_memset32(patch_addr, val, len / 4);
454
}
455
} else {
456
err = copy_to_kernel_nofault(patch_addr, code, len);
457
}
458
459
smp_wmb(); /* smp write barrier */
460
flush_icache_range(start, start + len);
461
return err;
462
}
463
464
/*
465
* A page is mapped and instructions that fit the page are patched.
466
* Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
467
*/
468
static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr)
469
{
470
struct mm_struct *patching_mm, *orig_mm;
471
unsigned long pfn = get_patch_pfn(addr);
472
unsigned long text_poke_addr;
473
spinlock_t *ptl;
474
u32 *patch_addr;
475
pte_t *pte;
476
int err;
477
478
patching_mm = __this_cpu_read(cpu_patching_context.mm);
479
text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
480
patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
481
482
pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
483
if (!pte)
484
return -ENOMEM;
485
486
__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
487
488
/* order PTE update before use, also serves as the hwsync */
489
asm volatile("ptesync" ::: "memory");
490
491
/* order context switch after arbitrary prior code */
492
isync();
493
494
orig_mm = start_using_temp_mm(patching_mm);
495
496
kasan_disable_current();
497
err = __patch_instructions(patch_addr, code, len, repeat_instr);
498
kasan_enable_current();
499
500
/* context synchronisation performed by __patch_instructions */
501
stop_using_temp_mm(patching_mm, orig_mm);
502
503
pte_clear(patching_mm, text_poke_addr, pte);
504
/*
505
* ptesync to order PTE update before TLB invalidation done
506
* by radix__local_flush_tlb_page_psize (in _tlbiel_va)
507
*/
508
local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
509
510
pte_unmap_unlock(pte, ptl);
511
512
return err;
513
}
514
515
/*
516
* A page is mapped and instructions that fit the page are patched.
517
* Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
518
*/
519
static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
520
{
521
unsigned long pfn = get_patch_pfn(addr);
522
unsigned long text_poke_addr;
523
u32 *patch_addr;
524
pte_t *pte;
525
int err;
526
527
text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
528
patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
529
530
pte = __this_cpu_read(cpu_patching_context.pte);
531
__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
532
/* See ptesync comment in radix__set_pte_at() */
533
if (radix_enabled())
534
asm volatile("ptesync" ::: "memory");
535
536
err = __patch_instructions(patch_addr, code, len, repeat_instr);
537
538
pte_clear(&init_mm, text_poke_addr, pte);
539
flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
540
541
return err;
542
}
543
544
/*
545
* Patch 'addr' with 'len' bytes of instructions from 'code'.
546
*
547
* If repeat_instr is true, the same instruction is filled for
548
* 'len' bytes.
549
*/
550
int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
551
{
552
while (len > 0) {
553
unsigned long flags;
554
size_t plen;
555
int err;
556
557
plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
558
559
local_irq_save(flags);
560
if (mm_patch_enabled())
561
err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
562
else
563
err = __do_patch_instructions(addr, code, plen, repeat_instr);
564
local_irq_restore(flags);
565
if (err)
566
return err;
567
568
len -= plen;
569
addr = (u32 *)((unsigned long)addr + plen);
570
if (!repeat_instr)
571
code = (u32 *)((unsigned long)code + plen);
572
}
573
574
return 0;
575
}
576
NOKPROBE_SYMBOL(patch_instructions);
577
578
int patch_branch(u32 *addr, unsigned long target, int flags)
579
{
580
ppc_inst_t instr;
581
582
if (create_branch(&instr, addr, target, flags))
583
return -ERANGE;
584
585
return patch_instruction(addr, instr);
586
}
587
588
/*
589
* Helper to check if a given instruction is a conditional branch
590
* Derived from the conditional checks in analyse_instr()
591
*/
592
bool is_conditional_branch(ppc_inst_t instr)
593
{
594
unsigned int opcode = ppc_inst_primary_opcode(instr);
595
596
if (opcode == 16) /* bc, bca, bcl, bcla */
597
return true;
598
if (opcode == 19) {
599
switch ((ppc_inst_val(instr) >> 1) & 0x3ff) {
600
case 16: /* bclr, bclrl */
601
case 528: /* bcctr, bcctrl */
602
case 560: /* bctar, bctarl */
603
return true;
604
}
605
}
606
return false;
607
}
608
NOKPROBE_SYMBOL(is_conditional_branch);
609
610
int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
611
unsigned long target, int flags)
612
{
613
long offset;
614
615
offset = target;
616
if (! (flags & BRANCH_ABSOLUTE))
617
offset = offset - (unsigned long)addr;
618
619
/* Check we can represent the target in the instruction format */
620
if (!is_offset_in_cond_branch_range(offset))
621
return 1;
622
623
/* Mask out the flags and target, so they don't step on each other. */
624
*instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC));
625
626
return 0;
627
}
628
629
int instr_is_relative_branch(ppc_inst_t instr)
630
{
631
if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
632
return 0;
633
634
return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
635
}
636
637
int instr_is_relative_link_branch(ppc_inst_t instr)
638
{
639
return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
640
}
641
642
static unsigned long branch_iform_target(const u32 *instr)
643
{
644
signed long imm;
645
646
imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
647
648
/* If the top bit of the immediate value is set this is negative */
649
if (imm & 0x2000000)
650
imm -= 0x4000000;
651
652
if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
653
imm += (unsigned long)instr;
654
655
return (unsigned long)imm;
656
}
657
658
static unsigned long branch_bform_target(const u32 *instr)
659
{
660
signed long imm;
661
662
imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
663
664
/* If the top bit of the immediate value is set this is negative */
665
if (imm & 0x8000)
666
imm -= 0x10000;
667
668
if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
669
imm += (unsigned long)instr;
670
671
return (unsigned long)imm;
672
}
673
674
unsigned long branch_target(const u32 *instr)
675
{
676
if (instr_is_branch_iform(ppc_inst_read(instr)))
677
return branch_iform_target(instr);
678
else if (instr_is_branch_bform(ppc_inst_read(instr)))
679
return branch_bform_target(instr);
680
681
return 0;
682
}
683
684
int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
685
{
686
unsigned long target;
687
target = branch_target(src);
688
689
if (instr_is_branch_iform(ppc_inst_read(src)))
690
return create_branch(instr, dest, target,
691
ppc_inst_val(ppc_inst_read(src)));
692
else if (instr_is_branch_bform(ppc_inst_read(src)))
693
return create_cond_branch(instr, dest, target,
694
ppc_inst_val(ppc_inst_read(src)));
695
696
return 1;
697
}
698
699