Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/mm/extable.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
#include <linux/extable.h>
3
#include <linux/uaccess.h>
4
#include <linux/sched/debug.h>
5
#include <linux/bitfield.h>
6
#include <xen/xen.h>
7
8
#include <asm/fpu/api.h>
9
#include <asm/fred.h>
10
#include <asm/sev.h>
11
#include <asm/traps.h>
12
#include <asm/kdebug.h>
13
#include <asm/insn-eval.h>
14
#include <asm/sgx.h>
15
16
static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr)
17
{
18
int reg_offset = pt_regs_offset(regs, nr);
19
static unsigned long __dummy;
20
21
if (WARN_ON_ONCE(reg_offset < 0))
22
return &__dummy;
23
24
return (unsigned long *)((unsigned long)regs + reg_offset);
25
}
26
27
static inline unsigned long
28
ex_fixup_addr(const struct exception_table_entry *x)
29
{
30
return (unsigned long)&x->fixup + x->fixup;
31
}
32
33
static bool ex_handler_default(const struct exception_table_entry *e,
34
struct pt_regs *regs)
35
{
36
if (e->data & EX_FLAG_CLEAR_AX)
37
regs->ax = 0;
38
if (e->data & EX_FLAG_CLEAR_DX)
39
regs->dx = 0;
40
41
regs->ip = ex_fixup_addr(e);
42
return true;
43
}
44
45
/*
46
* This is the *very* rare case where we do a "load_unaligned_zeropad()"
47
* and it's a page crosser into a non-existent page.
48
*
49
* This happens when we optimistically load a pathname a word-at-a-time
50
* and the name is less than the full word and the next page is not
51
* mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC.
52
*
53
* NOTE! The faulting address is always a 'mov mem,reg' type instruction
54
* of size 'long', and the exception fixup must always point to right
55
* after the instruction.
56
*/
57
static bool ex_handler_zeropad(const struct exception_table_entry *e,
58
struct pt_regs *regs,
59
unsigned long fault_addr)
60
{
61
struct insn insn;
62
const unsigned long mask = sizeof(long) - 1;
63
unsigned long offset, addr, next_ip, len;
64
unsigned long *reg;
65
66
next_ip = ex_fixup_addr(e);
67
len = next_ip - regs->ip;
68
if (len > MAX_INSN_SIZE)
69
return false;
70
71
if (insn_decode(&insn, (void *) regs->ip, len, INSN_MODE_KERN))
72
return false;
73
if (insn.length != len)
74
return false;
75
76
if (insn.opcode.bytes[0] != 0x8b)
77
return false;
78
if (insn.opnd_bytes != sizeof(long))
79
return false;
80
81
addr = (unsigned long) insn_get_addr_ref(&insn, regs);
82
if (addr == ~0ul)
83
return false;
84
85
offset = addr & mask;
86
addr = addr & ~mask;
87
if (fault_addr != addr + sizeof(long))
88
return false;
89
90
reg = insn_get_modrm_reg_ptr(&insn, regs);
91
if (!reg)
92
return false;
93
94
*reg = *(unsigned long *)addr >> (offset * 8);
95
return ex_handler_default(e, regs);
96
}
97
98
static bool ex_handler_fault(const struct exception_table_entry *fixup,
99
struct pt_regs *regs, int trapnr)
100
{
101
regs->ax = trapnr;
102
return ex_handler_default(fixup, regs);
103
}
104
105
static bool ex_handler_sgx(const struct exception_table_entry *fixup,
106
struct pt_regs *regs, int trapnr)
107
{
108
regs->ax = trapnr | SGX_ENCLS_FAULT_FLAG;
109
return ex_handler_default(fixup, regs);
110
}
111
112
/*
113
* Handler for when we fail to restore a task's FPU state. We should never get
114
* here because the FPU state of a task using the FPU (struct fpu::fpstate)
115
* should always be valid. However, past bugs have allowed userspace to set
116
* reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
117
* These caused XRSTOR to fail when switching to the task, leaking the FPU
118
* registers of the task previously executing on the CPU. Mitigate this class
119
* of vulnerability by restoring from the initial state (essentially, zeroing
120
* out all the FPU registers) if we can't restore from the task's FPU state.
121
*/
122
static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
123
struct pt_regs *regs)
124
{
125
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
126
(void *)instruction_pointer(regs));
127
128
fpu_reset_from_exception_fixup();
129
130
return ex_handler_default(fixup, regs);
131
}
132
133
/*
134
* On x86-64, we end up being imprecise with 'access_ok()', and allow
135
* non-canonical user addresses to make the range comparisons simpler,
136
* and to not have to worry about LAM being enabled.
137
*
138
* In fact, we allow up to one page of "slop" at the sign boundary,
139
* which means that we can do access_ok() by just checking the sign
140
* of the pointer for the common case of having a small access size.
141
*/
142
static bool gp_fault_address_ok(unsigned long fault_address)
143
{
144
#ifdef CONFIG_X86_64
145
/* Is it in the "user space" part of the non-canonical space? */
146
if (valid_user_address(fault_address))
147
return true;
148
149
/* .. or just above it? */
150
fault_address -= PAGE_SIZE;
151
if (valid_user_address(fault_address))
152
return true;
153
#endif
154
return false;
155
}
156
157
static bool ex_handler_uaccess(const struct exception_table_entry *fixup,
158
struct pt_regs *regs, int trapnr,
159
unsigned long fault_address)
160
{
161
WARN_ONCE(trapnr == X86_TRAP_GP && !gp_fault_address_ok(fault_address),
162
"General protection fault in user access. Non-canonical address?");
163
return ex_handler_default(fixup, regs);
164
}
165
166
static bool ex_handler_msr(const struct exception_table_entry *fixup,
167
struct pt_regs *regs, bool wrmsr, bool safe, int reg)
168
{
169
if (__ONCE_LITE_IF(!safe && wrmsr)) {
170
pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
171
(unsigned int)regs->cx, (unsigned int)regs->dx,
172
(unsigned int)regs->ax, regs->ip, (void *)regs->ip);
173
show_stack_regs(regs);
174
}
175
176
if (__ONCE_LITE_IF(!safe && !wrmsr)) {
177
pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
178
(unsigned int)regs->cx, regs->ip, (void *)regs->ip);
179
show_stack_regs(regs);
180
}
181
182
if (!wrmsr) {
183
/* Pretend that the read succeeded and returned 0. */
184
regs->ax = 0;
185
regs->dx = 0;
186
}
187
188
if (safe)
189
*pt_regs_nr(regs, reg) = -EIO;
190
191
return ex_handler_default(fixup, regs);
192
}
193
194
static bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
195
struct pt_regs *regs)
196
{
197
if (static_cpu_has(X86_BUG_NULL_SEG))
198
asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
199
asm volatile ("mov %0, %%fs" : : "rm" (0));
200
return ex_handler_default(fixup, regs);
201
}
202
203
static bool ex_handler_imm_reg(const struct exception_table_entry *fixup,
204
struct pt_regs *regs, int reg, int imm)
205
{
206
*pt_regs_nr(regs, reg) = (long)imm;
207
return ex_handler_default(fixup, regs);
208
}
209
210
static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup,
211
struct pt_regs *regs, int trapnr,
212
unsigned long fault_address,
213
int reg, int imm)
214
{
215
regs->cx = imm * regs->cx + *pt_regs_nr(regs, reg);
216
return ex_handler_uaccess(fixup, regs, trapnr, fault_address);
217
}
218
219
#ifdef CONFIG_X86_FRED
220
static bool ex_handler_eretu(const struct exception_table_entry *fixup,
221
struct pt_regs *regs, unsigned long error_code)
222
{
223
struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, orig_ax));
224
unsigned short ss = uregs->ss;
225
unsigned short cs = uregs->cs;
226
227
/*
228
* Move the NMI bit from the invalid stack frame, which caused ERETU
229
* to fault, to the fault handler's stack frame, thus to unblock NMI
230
* with the fault handler's ERETS instruction ASAP if NMI is blocked.
231
*/
232
regs->fred_ss.nmi = uregs->fred_ss.nmi;
233
234
/*
235
* Sync event information to uregs, i.e., the ERETU return frame, but
236
* is it safe to write to the ERETU return frame which is just above
237
* current event stack frame?
238
*
239
* The RSP used by FRED to push a stack frame is not the value in %rsp,
240
* it is calculated from %rsp with the following 2 steps:
241
* 1) RSP = %rsp - (IA32_FRED_CONFIG & 0x1c0) // Reserve N*64 bytes
242
* 2) RSP = RSP & ~0x3f // Align to a 64-byte cache line
243
* when an event delivery doesn't trigger a stack level change.
244
*
245
* Here is an example with N*64 (N=1) bytes reserved:
246
*
247
* 64-byte cache line ==> ______________
248
* |___Reserved___|
249
* |__Event_data__|
250
* |_____SS_______|
251
* |_____RSP______|
252
* |_____FLAGS____|
253
* |_____CS_______|
254
* |_____IP_______|
255
* 64-byte cache line ==> |__Error_code__| <== ERETU return frame
256
* |______________|
257
* |______________|
258
* |______________|
259
* |______________|
260
* |______________|
261
* |______________|
262
* |______________|
263
* 64-byte cache line ==> |______________| <== RSP after step 1) and 2)
264
* |___Reserved___|
265
* |__Event_data__|
266
* |_____SS_______|
267
* |_____RSP______|
268
* |_____FLAGS____|
269
* |_____CS_______|
270
* |_____IP_______|
271
* 64-byte cache line ==> |__Error_code__| <== ERETS return frame
272
*
273
* Thus a new FRED stack frame will always be pushed below a previous
274
* FRED stack frame ((N*64) bytes may be reserved between), and it is
275
* safe to write to a previous FRED stack frame as they never overlap.
276
*/
277
fred_info(uregs)->edata = fred_event_data(regs);
278
uregs->ssx = regs->ssx;
279
uregs->fred_ss.ss = ss;
280
/* The NMI bit was moved away above */
281
uregs->fred_ss.nmi = 0;
282
uregs->csx = regs->csx;
283
uregs->fred_cs.sl = 0;
284
uregs->fred_cs.wfe = 0;
285
uregs->cs = cs;
286
uregs->orig_ax = error_code;
287
288
return ex_handler_default(fixup, regs);
289
}
290
#endif
291
292
int ex_get_fixup_type(unsigned long ip)
293
{
294
const struct exception_table_entry *e = search_exception_tables(ip);
295
296
return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE;
297
}
298
299
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
300
unsigned long fault_addr)
301
{
302
const struct exception_table_entry *e;
303
int type, reg, imm;
304
305
#ifdef CONFIG_PNPBIOS
306
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
307
extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
308
extern u32 pnp_bios_is_utter_crap;
309
pnp_bios_is_utter_crap = 1;
310
printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
311
__asm__ volatile(
312
"movl %0, %%esp\n\t"
313
"jmp *%1\n\t"
314
: : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
315
panic("do_trap: can't hit this");
316
}
317
#endif
318
319
e = search_exception_tables(regs->ip);
320
if (!e)
321
return 0;
322
323
type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
324
reg = FIELD_GET(EX_DATA_REG_MASK, e->data);
325
imm = FIELD_GET(EX_DATA_IMM_MASK, e->data);
326
327
switch (type) {
328
case EX_TYPE_DEFAULT:
329
case EX_TYPE_DEFAULT_MCE_SAFE:
330
return ex_handler_default(e, regs);
331
case EX_TYPE_FAULT:
332
case EX_TYPE_FAULT_MCE_SAFE:
333
return ex_handler_fault(e, regs, trapnr);
334
case EX_TYPE_UACCESS:
335
return ex_handler_uaccess(e, regs, trapnr, fault_addr);
336
case EX_TYPE_CLEAR_FS:
337
return ex_handler_clear_fs(e, regs);
338
case EX_TYPE_FPU_RESTORE:
339
return ex_handler_fprestore(e, regs);
340
case EX_TYPE_BPF:
341
return ex_handler_bpf(e, regs);
342
case EX_TYPE_WRMSR:
343
return ex_handler_msr(e, regs, true, false, reg);
344
case EX_TYPE_RDMSR:
345
return ex_handler_msr(e, regs, false, false, reg);
346
case EX_TYPE_WRMSR_SAFE:
347
return ex_handler_msr(e, regs, true, true, reg);
348
case EX_TYPE_RDMSR_SAFE:
349
return ex_handler_msr(e, regs, false, true, reg);
350
case EX_TYPE_WRMSR_IN_MCE:
351
ex_handler_msr_mce(regs, true);
352
break;
353
case EX_TYPE_RDMSR_IN_MCE:
354
ex_handler_msr_mce(regs, false);
355
break;
356
case EX_TYPE_POP_REG:
357
regs->sp += sizeof(long);
358
fallthrough;
359
case EX_TYPE_IMM_REG:
360
return ex_handler_imm_reg(e, regs, reg, imm);
361
case EX_TYPE_FAULT_SGX:
362
return ex_handler_sgx(e, regs, trapnr);
363
case EX_TYPE_UCOPY_LEN:
364
return ex_handler_ucopy_len(e, regs, trapnr, fault_addr, reg, imm);
365
case EX_TYPE_ZEROPAD:
366
return ex_handler_zeropad(e, regs, fault_addr);
367
#ifdef CONFIG_X86_FRED
368
case EX_TYPE_ERETU:
369
return ex_handler_eretu(e, regs, error_code);
370
#endif
371
}
372
BUG();
373
}
374
375
extern unsigned int early_recursion_flag;
376
377
/* Restricted version used during very early boot */
378
void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
379
{
380
/* Ignore early NMIs. */
381
if (trapnr == X86_TRAP_NMI)
382
return;
383
384
if (early_recursion_flag > 2)
385
goto halt_loop;
386
387
/*
388
* Old CPUs leave the high bits of CS on the stack
389
* undefined. I'm not sure which CPUs do this, but at least
390
* the 486 DX works this way.
391
* Xen pv domains are not using the default __KERNEL_CS.
392
*/
393
if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
394
goto fail;
395
396
/*
397
* The full exception fixup machinery is available as soon as
398
* the early IDT is loaded. This means that it is the
399
* responsibility of extable users to either function correctly
400
* when handlers are invoked early or to simply avoid causing
401
* exceptions before they're ready to handle them.
402
*
403
* This is better than filtering which handlers can be used,
404
* because refusing to call a handler here is guaranteed to
405
* result in a hard-to-debug panic.
406
*
407
* Keep in mind that not all vectors actually get here. Early
408
* page faults, for example, are special.
409
*/
410
if (fixup_exception(regs, trapnr, regs->orig_ax, 0))
411
return;
412
413
if (trapnr == X86_TRAP_UD) {
414
if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
415
/* Skip the ud2. */
416
regs->ip += LEN_UD2;
417
return;
418
}
419
420
/*
421
* If this was a BUG and report_bug returns or if this
422
* was just a normal #UD, we want to continue onward and
423
* crash.
424
*/
425
}
426
427
fail:
428
early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
429
(unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
430
regs->orig_ax, read_cr2());
431
432
show_regs(regs);
433
434
halt_loop:
435
while (true)
436
halt();
437
}
438
439