Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kvm/emulate.c
49695 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/******************************************************************************
3
* emulate.c
4
*
5
* Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6
*
7
* Copyright (c) 2005 Keir Fraser
8
*
9
* Linux coding style, mod r/m decoder, segment base fixes, real-mode
10
* privileged instructions:
11
*
12
* Copyright (C) 2006 Qumranet
13
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
14
*
15
* Avi Kivity <[email protected]>
16
* Yaniv Kamay <[email protected]>
17
*
18
* From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
19
*/
20
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22
#include <linux/kvm_host.h>
23
#include "kvm_cache_regs.h"
24
#include "kvm_emulate.h"
25
#include <linux/stringify.h>
26
#include <asm/debugreg.h>
27
#include <asm/nospec-branch.h>
28
#include <asm/ibt.h>
29
#include <asm/text-patching.h>
30
31
#include "x86.h"
32
#include "tss.h"
33
#include "mmu.h"
34
#include "pmu.h"
35
36
/*
37
* Operand types
38
*/
39
#define OpNone 0ull
40
#define OpImplicit 1ull /* No generic decode */
41
#define OpReg 2ull /* Register */
42
#define OpMem 3ull /* Memory */
43
#define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
44
#define OpDI 5ull /* ES:DI/EDI/RDI */
45
#define OpMem64 6ull /* Memory, 64-bit */
46
#define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
47
#define OpDX 8ull /* DX register */
48
#define OpCL 9ull /* CL register (for shifts) */
49
#define OpImmByte 10ull /* 8-bit sign extended immediate */
50
#define OpOne 11ull /* Implied 1 */
51
#define OpImm 12ull /* Sign extended up to 32-bit immediate */
52
#define OpMem16 13ull /* Memory operand (16-bit). */
53
#define OpMem32 14ull /* Memory operand (32-bit). */
54
#define OpImmU 15ull /* Immediate operand, zero extended */
55
#define OpSI 16ull /* SI/ESI/RSI */
56
#define OpImmFAddr 17ull /* Immediate far address */
57
#define OpMemFAddr 18ull /* Far address in memory */
58
#define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
59
#define OpES 20ull /* ES */
60
#define OpCS 21ull /* CS */
61
#define OpSS 22ull /* SS */
62
#define OpDS 23ull /* DS */
63
#define OpFS 24ull /* FS */
64
#define OpGS 25ull /* GS */
65
#define OpMem8 26ull /* 8-bit zero extended memory operand */
66
#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
67
#define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
68
#define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
69
#define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
70
71
#define OpBits 5 /* Width of operand field */
72
#define OpMask ((1ull << OpBits) - 1)
73
74
/*
75
* Opcode effective-address decode tables.
76
* Note that we only emulate instructions that have at least one memory
77
* operand (excluding implicit stack references). We assume that stack
78
* references and instruction fetches will never occur in special memory
79
* areas that require emulation. So, for example, 'mov <imm>,<reg>' need
80
* not be handled.
81
*/
82
83
/* Operand sizes: 8-bit operands or specified/overridden size. */
84
#define ByteOp (1<<0) /* 8-bit operands. */
85
#define DstShift 1 /* Destination operand type at bits 1-5 */
86
#define ImplicitOps (OpImplicit << DstShift)
87
#define DstReg (OpReg << DstShift)
88
#define DstMem (OpMem << DstShift)
89
#define DstAcc (OpAcc << DstShift)
90
#define DstDI (OpDI << DstShift)
91
#define DstMem64 (OpMem64 << DstShift)
92
#define DstMem16 (OpMem16 << DstShift)
93
#define DstImmUByte (OpImmUByte << DstShift)
94
#define DstDX (OpDX << DstShift)
95
#define DstAccLo (OpAccLo << DstShift)
96
#define DstMask (OpMask << DstShift)
97
#define SrcShift 6 /* Source operand type at bits 6-10 */
98
#define SrcNone (OpNone << SrcShift)
99
#define SrcReg (OpReg << SrcShift)
100
#define SrcMem (OpMem << SrcShift)
101
#define SrcMem16 (OpMem16 << SrcShift)
102
#define SrcMem32 (OpMem32 << SrcShift)
103
#define SrcImm (OpImm << SrcShift)
104
#define SrcImmByte (OpImmByte << SrcShift)
105
#define SrcOne (OpOne << SrcShift)
106
#define SrcImmUByte (OpImmUByte << SrcShift)
107
#define SrcImmU (OpImmU << SrcShift)
108
#define SrcSI (OpSI << SrcShift)
109
#define SrcXLat (OpXLat << SrcShift)
110
#define SrcImmFAddr (OpImmFAddr << SrcShift)
111
#define SrcMemFAddr (OpMemFAddr << SrcShift)
112
#define SrcAcc (OpAcc << SrcShift)
113
#define SrcImmU16 (OpImmU16 << SrcShift)
114
#define SrcImm64 (OpImm64 << SrcShift)
115
#define SrcDX (OpDX << SrcShift)
116
#define SrcMem8 (OpMem8 << SrcShift)
117
#define SrcAccHi (OpAccHi << SrcShift)
118
#define SrcMask (OpMask << SrcShift)
119
#define BitOp (1<<11)
120
#define MemAbs (1<<12) /* Memory operand is absolute displacement */
121
#define String (1<<13) /* String instruction (rep capable) */
122
#define Stack (1<<14) /* Stack instruction (push/pop) */
123
#define GroupMask (7<<15) /* Group mechanisms, at bits 15-17 */
124
#define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
125
#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
126
#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
127
#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
128
#define Escape (5<<15) /* Escape to coprocessor instruction */
129
#define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
130
#define ModeDual (7<<15) /* Different instruction for 32/64 bit */
131
#define Sse (1<<18) /* SSE Vector instruction */
132
#define ModRM (1<<19) /* Generic ModRM decode. */
133
#define Mov (1<<20) /* Destination is only written; never read. */
134
#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
135
#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
136
#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
137
#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
138
#define Undefined (1<<25) /* No Such Instruction */
139
#define Lock (1<<26) /* lock prefix is allowed for the instruction */
140
#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
141
#define No64 (1<<28) /* Instruction generates #UD in 64-bit mode */
142
#define PageTable (1 << 29) /* instruction used to write page table */
143
#define NotImpl (1 << 30) /* instruction is not implemented */
144
#define Avx ((u64)1 << 31) /* Instruction uses VEX prefix */
145
#define Src2Shift (32) /* Source 2 operand type at bits 32-36 */
146
#define Src2None (OpNone << Src2Shift)
147
#define Src2Mem (OpMem << Src2Shift)
148
#define Src2CL (OpCL << Src2Shift)
149
#define Src2ImmByte (OpImmByte << Src2Shift)
150
#define Src2One (OpOne << Src2Shift)
151
#define Src2Imm (OpImm << Src2Shift)
152
#define Src2ES (OpES << Src2Shift)
153
#define Src2CS (OpCS << Src2Shift)
154
#define Src2SS (OpSS << Src2Shift)
155
#define Src2DS (OpDS << Src2Shift)
156
#define Src2FS (OpFS << Src2Shift)
157
#define Src2GS (OpGS << Src2Shift)
158
#define Src2Mask (OpMask << Src2Shift)
159
/* free: 37-39 */
160
#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
161
#define AlignMask ((u64)3 << 41) /* Memory alignment requirement at bits 41-42 */
162
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
163
#define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
164
#define Aligned16 ((u64)3 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
165
/* free: 43-44 */
166
#define NoWrite ((u64)1 << 45) /* No writeback */
167
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
168
#define NoMod ((u64)1 << 47) /* Mod field is ignored */
169
#define Intercept ((u64)1 << 48) /* Has valid intercept field */
170
#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
171
#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
172
#define NearBranch ((u64)1 << 52) /* Near branches */
173
#define No16 ((u64)1 << 53) /* No 16 bit operand */
174
#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
175
#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
176
#define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
177
#define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */
178
179
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
180
181
#define X2(x...) x, x
182
#define X3(x...) X2(x), x
183
#define X4(x...) X2(x), X2(x)
184
#define X5(x...) X4(x), x
185
#define X6(x...) X4(x), X2(x)
186
#define X7(x...) X4(x), X3(x)
187
#define X8(x...) X4(x), X4(x)
188
#define X16(x...) X8(x), X8(x)
189
190
struct opcode {
191
u64 flags;
192
u8 intercept;
193
u8 pad[7];
194
union {
195
int (*execute)(struct x86_emulate_ctxt *ctxt);
196
const struct opcode *group;
197
const struct group_dual *gdual;
198
const struct gprefix *gprefix;
199
const struct escape *esc;
200
const struct instr_dual *idual;
201
const struct mode_dual *mdual;
202
} u;
203
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
204
};
205
206
struct group_dual {
207
struct opcode mod012[8];
208
struct opcode mod3[8];
209
};
210
211
struct gprefix {
212
struct opcode pfx_no;
213
struct opcode pfx_66;
214
struct opcode pfx_f2;
215
struct opcode pfx_f3;
216
};
217
218
struct escape {
219
struct opcode op[8];
220
struct opcode high[64];
221
};
222
223
struct instr_dual {
224
struct opcode mod012;
225
struct opcode mod3;
226
};
227
228
struct mode_dual {
229
struct opcode mode32;
230
struct opcode mode64;
231
};
232
233
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
234
235
enum x86_transfer_type {
236
X86_TRANSFER_NONE,
237
X86_TRANSFER_CALL_JMP,
238
X86_TRANSFER_RET,
239
X86_TRANSFER_TASK_SWITCH,
240
};
241
242
enum rex_bits {
243
REX_B = 1,
244
REX_X = 2,
245
REX_R = 4,
246
REX_W = 8,
247
};
248
249
static void writeback_registers(struct x86_emulate_ctxt *ctxt)
250
{
251
unsigned long dirty = ctxt->regs_dirty;
252
unsigned reg;
253
254
for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
255
ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
256
}
257
258
static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
259
{
260
ctxt->regs_dirty = 0;
261
ctxt->regs_valid = 0;
262
}
263
264
/*
265
* These EFLAGS bits are restored from saved value during emulation, and
266
* any changes are written back to the saved value after emulation.
267
*/
268
#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
269
X86_EFLAGS_PF|X86_EFLAGS_CF)
270
271
#ifdef CONFIG_X86_64
272
#define ON64(x...) x
273
#else
274
#define ON64(x...)
275
#endif
276
277
#define EM_ASM_START(op) \
278
static int em_##op(struct x86_emulate_ctxt *ctxt) \
279
{ \
280
unsigned long flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; \
281
int bytes = 1, ok = 1; \
282
if (!(ctxt->d & ByteOp)) \
283
bytes = ctxt->dst.bytes; \
284
switch (bytes) {
285
286
#define __EM_ASM(str) \
287
asm("push %[flags]; popf \n\t" \
288
"10: " str \
289
"pushf; pop %[flags] \n\t" \
290
"11: \n\t" \
291
: "+a" (ctxt->dst.val), \
292
"+d" (ctxt->src.val), \
293
[flags] "+D" (flags), \
294
"+S" (ok) \
295
: "c" (ctxt->src2.val))
296
297
#define __EM_ASM_1(op, dst) \
298
__EM_ASM(#op " %%" #dst " \n\t")
299
300
#define __EM_ASM_1_EX(op, dst) \
301
__EM_ASM(#op " %%" #dst " \n\t" \
302
_ASM_EXTABLE_TYPE_REG(10b, 11f, EX_TYPE_ZERO_REG, %%esi))
303
304
#define __EM_ASM_2(op, dst, src) \
305
__EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
306
307
#define __EM_ASM_3(op, dst, src, src2) \
308
__EM_ASM(#op " %%" #src2 ", %%" #src ", %%" #dst " \n\t")
309
310
#define EM_ASM_END \
311
} \
312
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); \
313
return !ok ? emulate_de(ctxt) : X86EMUL_CONTINUE; \
314
}
315
316
/* 1-operand, using "a" (dst) */
317
#define EM_ASM_1(op) \
318
EM_ASM_START(op) \
319
case 1: __EM_ASM_1(op##b, al); break; \
320
case 2: __EM_ASM_1(op##w, ax); break; \
321
case 4: __EM_ASM_1(op##l, eax); break; \
322
ON64(case 8: __EM_ASM_1(op##q, rax); break;) \
323
EM_ASM_END
324
325
/* 1-operand, using "c" (src2) */
326
#define EM_ASM_1SRC2(op, name) \
327
EM_ASM_START(name) \
328
case 1: __EM_ASM_1(op##b, cl); break; \
329
case 2: __EM_ASM_1(op##w, cx); break; \
330
case 4: __EM_ASM_1(op##l, ecx); break; \
331
ON64(case 8: __EM_ASM_1(op##q, rcx); break;) \
332
EM_ASM_END
333
334
/* 1-operand, using "c" (src2) with exception */
335
#define EM_ASM_1SRC2EX(op, name) \
336
EM_ASM_START(name) \
337
case 1: __EM_ASM_1_EX(op##b, cl); break; \
338
case 2: __EM_ASM_1_EX(op##w, cx); break; \
339
case 4: __EM_ASM_1_EX(op##l, ecx); break; \
340
ON64(case 8: __EM_ASM_1_EX(op##q, rcx); break;) \
341
EM_ASM_END
342
343
/* 2-operand, using "a" (dst), "d" (src) */
344
#define EM_ASM_2(op) \
345
EM_ASM_START(op) \
346
case 1: __EM_ASM_2(op##b, al, dl); break; \
347
case 2: __EM_ASM_2(op##w, ax, dx); break; \
348
case 4: __EM_ASM_2(op##l, eax, edx); break; \
349
ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \
350
EM_ASM_END
351
352
/* 2-operand, reversed */
353
#define EM_ASM_2R(op, name) \
354
EM_ASM_START(name) \
355
case 1: __EM_ASM_2(op##b, dl, al); break; \
356
case 2: __EM_ASM_2(op##w, dx, ax); break; \
357
case 4: __EM_ASM_2(op##l, edx, eax); break; \
358
ON64(case 8: __EM_ASM_2(op##q, rdx, rax); break;) \
359
EM_ASM_END
360
361
/* 2-operand, word only (no byte op) */
362
#define EM_ASM_2W(op) \
363
EM_ASM_START(op) \
364
case 1: break; \
365
case 2: __EM_ASM_2(op##w, ax, dx); break; \
366
case 4: __EM_ASM_2(op##l, eax, edx); break; \
367
ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \
368
EM_ASM_END
369
370
/* 2-operand, using "a" (dst) and CL (src2) */
371
#define EM_ASM_2CL(op) \
372
EM_ASM_START(op) \
373
case 1: __EM_ASM_2(op##b, al, cl); break; \
374
case 2: __EM_ASM_2(op##w, ax, cl); break; \
375
case 4: __EM_ASM_2(op##l, eax, cl); break; \
376
ON64(case 8: __EM_ASM_2(op##q, rax, cl); break;) \
377
EM_ASM_END
378
379
/* 3-operand, using "a" (dst), "d" (src) and CL (src2) */
380
#define EM_ASM_3WCL(op) \
381
EM_ASM_START(op) \
382
case 1: break; \
383
case 2: __EM_ASM_3(op##w, ax, dx, cl); break; \
384
case 4: __EM_ASM_3(op##l, eax, edx, cl); break; \
385
ON64(case 8: __EM_ASM_3(op##q, rax, rdx, cl); break;) \
386
EM_ASM_END
387
388
static int em_salc(struct x86_emulate_ctxt *ctxt)
389
{
390
/*
391
* Set AL 0xFF if CF is set, or 0x00 when clear.
392
*/
393
ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF);
394
return X86EMUL_CONTINUE;
395
}
396
397
/*
398
* XXX: inoutclob user must know where the argument is being expanded.
399
* Using asm goto would allow us to remove _fault.
400
*/
401
#define asm_safe(insn, inoutclob...) \
402
({ \
403
int _fault = 0; \
404
\
405
asm volatile("1:" insn "\n" \
406
"2:\n" \
407
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
408
: [_fault] "+r"(_fault) inoutclob ); \
409
\
410
_fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
411
})
412
413
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
414
enum x86_intercept intercept,
415
enum x86_intercept_stage stage)
416
{
417
struct x86_instruction_info info = {
418
.intercept = intercept,
419
.rep_prefix = ctxt->rep_prefix,
420
.modrm_mod = ctxt->modrm_mod,
421
.modrm_reg = ctxt->modrm_reg,
422
.modrm_rm = ctxt->modrm_rm,
423
.src_val = ctxt->src.val64,
424
.dst_val = ctxt->dst.val64,
425
.src_bytes = ctxt->src.bytes,
426
.dst_bytes = ctxt->dst.bytes,
427
.src_type = ctxt->src.type,
428
.dst_type = ctxt->dst.type,
429
.ad_bytes = ctxt->ad_bytes,
430
.rip = ctxt->eip,
431
.next_rip = ctxt->_eip,
432
};
433
434
return ctxt->ops->intercept(ctxt, &info, stage);
435
}
436
437
static void assign_masked(ulong *dest, ulong src, ulong mask)
438
{
439
*dest = (*dest & ~mask) | (src & mask);
440
}
441
442
static void assign_register(unsigned long *reg, u64 val, int bytes)
443
{
444
/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
445
switch (bytes) {
446
case 1:
447
*(u8 *)reg = (u8)val;
448
break;
449
case 2:
450
*(u16 *)reg = (u16)val;
451
break;
452
case 4:
453
*reg = (u32)val;
454
break; /* 64b: zero-extend */
455
case 8:
456
*reg = val;
457
break;
458
}
459
}
460
461
static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
462
{
463
return (1UL << (ctxt->ad_bytes << 3)) - 1;
464
}
465
466
static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
467
{
468
u16 sel;
469
struct desc_struct ss;
470
471
if (ctxt->mode == X86EMUL_MODE_PROT64)
472
return ~0UL;
473
ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
474
return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
475
}
476
477
static int stack_size(struct x86_emulate_ctxt *ctxt)
478
{
479
return (__fls(stack_mask(ctxt)) + 1) >> 3;
480
}
481
482
/* Access/update address held in a register, based on addressing mode. */
483
static inline unsigned long
484
address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
485
{
486
if (ctxt->ad_bytes == sizeof(unsigned long))
487
return reg;
488
else
489
return reg & ad_mask(ctxt);
490
}
491
492
static inline unsigned long
493
register_address(struct x86_emulate_ctxt *ctxt, int reg)
494
{
495
return address_mask(ctxt, reg_read(ctxt, reg));
496
}
497
498
static void masked_increment(ulong *reg, ulong mask, int inc)
499
{
500
assign_masked(reg, *reg + inc, mask);
501
}
502
503
static inline void
504
register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
505
{
506
ulong *preg = reg_rmw(ctxt, reg);
507
508
assign_register(preg, *preg + inc, ctxt->ad_bytes);
509
}
510
511
static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
512
{
513
masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
514
}
515
516
static u32 desc_limit_scaled(struct desc_struct *desc)
517
{
518
u32 limit = get_desc_limit(desc);
519
520
return desc->g ? (limit << 12) | 0xfff : limit;
521
}
522
523
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
524
{
525
if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
526
return 0;
527
528
return ctxt->ops->get_cached_segment_base(ctxt, seg);
529
}
530
531
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
532
u32 error, bool valid)
533
{
534
if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
535
return X86EMUL_UNHANDLEABLE;
536
537
ctxt->exception.vector = vec;
538
ctxt->exception.error_code = error;
539
ctxt->exception.error_code_valid = valid;
540
return X86EMUL_PROPAGATE_FAULT;
541
}
542
543
static int emulate_db(struct x86_emulate_ctxt *ctxt)
544
{
545
return emulate_exception(ctxt, DB_VECTOR, 0, false);
546
}
547
548
static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
549
{
550
return emulate_exception(ctxt, GP_VECTOR, err, true);
551
}
552
553
static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
554
{
555
return emulate_exception(ctxt, SS_VECTOR, err, true);
556
}
557
558
static int emulate_ud(struct x86_emulate_ctxt *ctxt)
559
{
560
return emulate_exception(ctxt, UD_VECTOR, 0, false);
561
}
562
563
static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
564
{
565
return emulate_exception(ctxt, TS_VECTOR, err, true);
566
}
567
568
static int emulate_de(struct x86_emulate_ctxt *ctxt)
569
{
570
return emulate_exception(ctxt, DE_VECTOR, 0, false);
571
}
572
573
static int emulate_nm(struct x86_emulate_ctxt *ctxt)
574
{
575
return emulate_exception(ctxt, NM_VECTOR, 0, false);
576
}
577
578
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
579
{
580
u16 selector;
581
struct desc_struct desc;
582
583
ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
584
return selector;
585
}
586
587
static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
588
unsigned seg)
589
{
590
u16 dummy;
591
u32 base3;
592
struct desc_struct desc;
593
594
ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
595
ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
596
}
597
598
static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
599
{
600
return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
601
}
602
603
static inline bool emul_is_noncanonical_address(u64 la,
604
struct x86_emulate_ctxt *ctxt,
605
unsigned int flags)
606
{
607
return !ctxt->ops->is_canonical_addr(ctxt, la, flags);
608
}
609
610
/*
611
* x86 defines three classes of vector instructions: explicitly
612
* aligned, explicitly unaligned, and the rest, which change behaviour
613
* depending on whether they're AVX encoded or not.
614
*
615
* Also included is CMPXCHG16B which is not a vector instruction, yet it is
616
* subject to the same check. FXSAVE and FXRSTOR are checked here too as their
617
* 512 bytes of data must be aligned to a 16 byte boundary.
618
*/
619
static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
620
{
621
u64 alignment = ctxt->d & AlignMask;
622
623
if (likely(size < 16))
624
return 1;
625
626
switch (alignment) {
627
case Unaligned:
628
return 1;
629
case Aligned16:
630
return 16;
631
case Aligned:
632
default:
633
return size;
634
}
635
}
636
637
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
638
struct segmented_address addr,
639
unsigned *max_size, unsigned size,
640
enum x86emul_mode mode, ulong *linear,
641
unsigned int flags)
642
{
643
struct desc_struct desc;
644
bool usable;
645
ulong la;
646
u32 lim;
647
u16 sel;
648
u8 va_bits;
649
650
la = seg_base(ctxt, addr.seg) + addr.ea;
651
*max_size = 0;
652
switch (mode) {
653
case X86EMUL_MODE_PROT64:
654
*linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
655
va_bits = ctxt_virt_addr_bits(ctxt);
656
if (!__is_canonical_address(la, va_bits))
657
goto bad;
658
659
*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
660
if (size > *max_size)
661
goto bad;
662
break;
663
default:
664
*linear = la = (u32)la;
665
usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
666
addr.seg);
667
if (!usable)
668
goto bad;
669
/* code segment in protected mode or read-only data segment */
670
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
671
(flags & X86EMUL_F_WRITE))
672
goto bad;
673
/* unreadable code segment */
674
if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
675
goto bad;
676
lim = desc_limit_scaled(&desc);
677
if (!(desc.type & 8) && (desc.type & 4)) {
678
/* expand-down segment */
679
if (addr.ea <= lim)
680
goto bad;
681
lim = desc.d ? 0xffffffff : 0xffff;
682
}
683
if (addr.ea > lim)
684
goto bad;
685
if (lim == 0xffffffff)
686
*max_size = ~0u;
687
else {
688
*max_size = (u64)lim + 1 - addr.ea;
689
if (size > *max_size)
690
goto bad;
691
}
692
break;
693
}
694
if (la & (insn_alignment(ctxt, size) - 1))
695
return emulate_gp(ctxt, 0);
696
return X86EMUL_CONTINUE;
697
bad:
698
if (addr.seg == VCPU_SREG_SS)
699
return emulate_ss(ctxt, 0);
700
else
701
return emulate_gp(ctxt, 0);
702
}
703
704
static int linearize(struct x86_emulate_ctxt *ctxt,
705
struct segmented_address addr,
706
unsigned size, bool write,
707
ulong *linear)
708
{
709
unsigned max_size;
710
return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
711
write ? X86EMUL_F_WRITE : 0);
712
}
713
714
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
715
{
716
ulong linear;
717
int rc;
718
unsigned max_size;
719
struct segmented_address addr = { .seg = VCPU_SREG_CS,
720
.ea = dst };
721
722
if (ctxt->op_bytes != sizeof(unsigned long))
723
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
724
rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
725
X86EMUL_F_FETCH);
726
if (rc == X86EMUL_CONTINUE)
727
ctxt->_eip = addr.ea;
728
return rc;
729
}
730
731
static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
732
{
733
u64 efer;
734
struct desc_struct cs;
735
u16 selector;
736
u32 base3;
737
738
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
739
740
if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
741
/* Real mode. cpu must not have long mode active */
742
if (efer & EFER_LMA)
743
return X86EMUL_UNHANDLEABLE;
744
ctxt->mode = X86EMUL_MODE_REAL;
745
return X86EMUL_CONTINUE;
746
}
747
748
if (ctxt->eflags & X86_EFLAGS_VM) {
749
/* Protected/VM86 mode. cpu must not have long mode active */
750
if (efer & EFER_LMA)
751
return X86EMUL_UNHANDLEABLE;
752
ctxt->mode = X86EMUL_MODE_VM86;
753
return X86EMUL_CONTINUE;
754
}
755
756
if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
757
return X86EMUL_UNHANDLEABLE;
758
759
if (efer & EFER_LMA) {
760
if (cs.l) {
761
/* Proper long mode */
762
ctxt->mode = X86EMUL_MODE_PROT64;
763
} else if (cs.d) {
764
/* 32 bit compatibility mode*/
765
ctxt->mode = X86EMUL_MODE_PROT32;
766
} else {
767
ctxt->mode = X86EMUL_MODE_PROT16;
768
}
769
} else {
770
/* Legacy 32 bit / 16 bit mode */
771
ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
772
}
773
774
return X86EMUL_CONTINUE;
775
}
776
777
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
778
{
779
return assign_eip(ctxt, dst);
780
}
781
782
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
783
{
784
int rc = emulator_recalc_and_set_mode(ctxt);
785
786
if (rc != X86EMUL_CONTINUE)
787
return rc;
788
789
return assign_eip(ctxt, dst);
790
}
791
792
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
793
{
794
return assign_eip_near(ctxt, ctxt->_eip + rel);
795
}
796
797
static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
798
void *data, unsigned size)
799
{
800
return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
801
}
802
803
static int linear_write_system(struct x86_emulate_ctxt *ctxt,
804
ulong linear, void *data,
805
unsigned int size)
806
{
807
return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
808
}
809
810
static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
811
struct segmented_address addr,
812
void *data,
813
unsigned size)
814
{
815
int rc;
816
ulong linear;
817
818
rc = linearize(ctxt, addr, size, false, &linear);
819
if (rc != X86EMUL_CONTINUE)
820
return rc;
821
return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
822
}
823
824
static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
825
struct segmented_address addr,
826
void *data,
827
unsigned int size)
828
{
829
int rc;
830
ulong linear;
831
832
rc = linearize(ctxt, addr, size, true, &linear);
833
if (rc != X86EMUL_CONTINUE)
834
return rc;
835
return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
836
}
837
838
/*
839
* Prefetch the remaining bytes of the instruction without crossing page
840
* boundary if they are not in fetch_cache yet.
841
*/
842
static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
843
{
844
int rc;
845
unsigned size, max_size;
846
unsigned long linear;
847
int cur_size = ctxt->fetch.end - ctxt->fetch.data;
848
struct segmented_address addr = { .seg = VCPU_SREG_CS,
849
.ea = ctxt->eip + cur_size };
850
851
/*
852
* We do not know exactly how many bytes will be needed, and
853
* __linearize is expensive, so fetch as much as possible. We
854
* just have to avoid going beyond the 15 byte limit, the end
855
* of the segment, or the end of the page.
856
*
857
* __linearize is called with size 0 so that it does not do any
858
* boundary check itself. Instead, we use max_size to check
859
* against op_size.
860
*/
861
rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
862
X86EMUL_F_FETCH);
863
if (unlikely(rc != X86EMUL_CONTINUE))
864
return rc;
865
866
size = min_t(unsigned, 15UL ^ cur_size, max_size);
867
size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
868
869
/*
870
* One instruction can only straddle two pages,
871
* and one has been loaded at the beginning of
872
* x86_decode_insn. So, if not enough bytes
873
* still, we must have hit the 15-byte boundary.
874
*/
875
if (unlikely(size < op_size))
876
return emulate_gp(ctxt, 0);
877
878
rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
879
size, &ctxt->exception);
880
if (unlikely(rc != X86EMUL_CONTINUE))
881
return rc;
882
ctxt->fetch.end += size;
883
return X86EMUL_CONTINUE;
884
}
885
886
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
887
unsigned size)
888
{
889
unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
890
891
if (unlikely(done_size < size))
892
return __do_insn_fetch_bytes(ctxt, size - done_size);
893
else
894
return X86EMUL_CONTINUE;
895
}
896
897
/* Fetch next part of the instruction being emulated. */
898
#define insn_fetch(_type, _ctxt) \
899
({ _type _x; \
900
\
901
rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
902
if (rc != X86EMUL_CONTINUE) \
903
goto done; \
904
ctxt->_eip += sizeof(_type); \
905
memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
906
ctxt->fetch.ptr += sizeof(_type); \
907
_x; \
908
})
909
910
#define insn_fetch_arr(_arr, _size, _ctxt) \
911
({ \
912
rc = do_insn_fetch_bytes(_ctxt, _size); \
913
if (rc != X86EMUL_CONTINUE) \
914
goto done; \
915
ctxt->_eip += (_size); \
916
memcpy(_arr, ctxt->fetch.ptr, _size); \
917
ctxt->fetch.ptr += (_size); \
918
})
919
920
/*
921
* Given the 'reg' portion of a ModRM byte, and a register block, return a
922
* pointer into the block that addresses the relevant register.
923
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
924
*/
925
static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
926
int byteop)
927
{
928
void *p;
929
int highbyte_regs = (ctxt->rex_prefix == REX_NONE) && byteop;
930
931
if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
932
p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
933
else
934
p = reg_rmw(ctxt, modrm_reg);
935
return p;
936
}
937
938
static int read_descriptor(struct x86_emulate_ctxt *ctxt,
939
struct segmented_address addr,
940
u16 *size, unsigned long *address, int op_bytes)
941
{
942
int rc;
943
944
if (op_bytes == 2)
945
op_bytes = 3;
946
*address = 0;
947
rc = segmented_read_std(ctxt, addr, size, 2);
948
if (rc != X86EMUL_CONTINUE)
949
return rc;
950
addr.ea += 2;
951
rc = segmented_read_std(ctxt, addr, address, op_bytes);
952
return rc;
953
}
954
955
EM_ASM_2(add);
956
EM_ASM_2(or);
957
EM_ASM_2(adc);
958
EM_ASM_2(sbb);
959
EM_ASM_2(and);
960
EM_ASM_2(sub);
961
EM_ASM_2(xor);
962
EM_ASM_2(cmp);
963
EM_ASM_2(test);
964
EM_ASM_2(xadd);
965
966
EM_ASM_1SRC2(mul, mul_ex);
967
EM_ASM_1SRC2(imul, imul_ex);
968
EM_ASM_1SRC2EX(div, div_ex);
969
EM_ASM_1SRC2EX(idiv, idiv_ex);
970
971
EM_ASM_3WCL(shld);
972
EM_ASM_3WCL(shrd);
973
974
EM_ASM_2W(imul);
975
976
EM_ASM_1(not);
977
EM_ASM_1(neg);
978
EM_ASM_1(inc);
979
EM_ASM_1(dec);
980
981
EM_ASM_2CL(rol);
982
EM_ASM_2CL(ror);
983
EM_ASM_2CL(rcl);
984
EM_ASM_2CL(rcr);
985
EM_ASM_2CL(shl);
986
EM_ASM_2CL(shr);
987
EM_ASM_2CL(sar);
988
989
EM_ASM_2W(bsf);
990
EM_ASM_2W(bsr);
991
EM_ASM_2W(bt);
992
EM_ASM_2W(bts);
993
EM_ASM_2W(btr);
994
EM_ASM_2W(btc);
995
996
EM_ASM_2R(cmp, cmp_r);
997
998
static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
999
{
1000
/* If src is zero, do not writeback, but update flags */
1001
if (ctxt->src.val == 0)
1002
ctxt->dst.type = OP_NONE;
1003
return em_bsf(ctxt);
1004
}
1005
1006
static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1007
{
1008
/* If src is zero, do not writeback, but update flags */
1009
if (ctxt->src.val == 0)
1010
ctxt->dst.type = OP_NONE;
1011
return em_bsr(ctxt);
1012
}
1013
1014
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1015
{
1016
return __emulate_cc(flags, condition & 0xf);
1017
}
1018
1019
static void fetch_register_operand(struct operand *op)
1020
{
1021
switch (op->bytes) {
1022
case 1:
1023
op->val = *(u8 *)op->addr.reg;
1024
break;
1025
case 2:
1026
op->val = *(u16 *)op->addr.reg;
1027
break;
1028
case 4:
1029
op->val = *(u32 *)op->addr.reg;
1030
break;
1031
case 8:
1032
op->val = *(u64 *)op->addr.reg;
1033
break;
1034
}
1035
op->orig_val = op->val;
1036
}
1037
1038
static int em_fninit(struct x86_emulate_ctxt *ctxt)
1039
{
1040
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1041
return emulate_nm(ctxt);
1042
1043
kvm_fpu_get();
1044
asm volatile("fninit");
1045
kvm_fpu_put();
1046
return X86EMUL_CONTINUE;
1047
}
1048
1049
static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1050
{
1051
u16 fcw;
1052
1053
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1054
return emulate_nm(ctxt);
1055
1056
kvm_fpu_get();
1057
asm volatile("fnstcw %0": "+m"(fcw));
1058
kvm_fpu_put();
1059
1060
ctxt->dst.val = fcw;
1061
1062
return X86EMUL_CONTINUE;
1063
}
1064
1065
static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1066
{
1067
u16 fsw;
1068
1069
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1070
return emulate_nm(ctxt);
1071
1072
kvm_fpu_get();
1073
asm volatile("fnstsw %0": "+m"(fsw));
1074
kvm_fpu_put();
1075
1076
ctxt->dst.val = fsw;
1077
1078
return X86EMUL_CONTINUE;
1079
}
1080
1081
static void __decode_register_operand(struct x86_emulate_ctxt *ctxt,
1082
struct operand *op, int reg)
1083
{
1084
if ((ctxt->d & Avx) && ctxt->op_bytes == 32) {
1085
op->type = OP_YMM;
1086
op->bytes = 32;
1087
op->addr.xmm = reg;
1088
kvm_read_avx_reg(reg, &op->vec_val2);
1089
return;
1090
}
1091
if (ctxt->d & (Avx|Sse)) {
1092
op->type = OP_XMM;
1093
op->bytes = 16;
1094
op->addr.xmm = reg;
1095
kvm_read_sse_reg(reg, &op->vec_val);
1096
return;
1097
}
1098
if (ctxt->d & Mmx) {
1099
reg &= 7;
1100
op->type = OP_MM;
1101
op->bytes = 8;
1102
op->addr.mm = reg;
1103
return;
1104
}
1105
1106
op->type = OP_REG;
1107
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1108
op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1109
fetch_register_operand(op);
1110
}
1111
1112
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1113
struct operand *op)
1114
{
1115
unsigned int reg;
1116
1117
if (ctxt->d & ModRM)
1118
reg = ctxt->modrm_reg;
1119
else
1120
reg = (ctxt->b & 7) | (ctxt->rex_bits & REX_B ? 8 : 0);
1121
1122
__decode_register_operand(ctxt, op, reg);
1123
}
1124
1125
static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1126
{
1127
if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1128
ctxt->modrm_seg = VCPU_SREG_SS;
1129
}
1130
1131
static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1132
struct operand *op)
1133
{
1134
u8 sib;
1135
int index_reg, base_reg, scale;
1136
int rc = X86EMUL_CONTINUE;
1137
ulong modrm_ea = 0;
1138
1139
ctxt->modrm_reg = (ctxt->rex_bits & REX_R ? 8 : 0);
1140
index_reg = (ctxt->rex_bits & REX_X ? 8 : 0);
1141
base_reg = (ctxt->rex_bits & REX_B ? 8 : 0);
1142
1143
ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1144
ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1145
ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1146
ctxt->modrm_seg = VCPU_SREG_DS;
1147
1148
if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1149
__decode_register_operand(ctxt, op, ctxt->modrm_rm);
1150
return rc;
1151
}
1152
1153
op->type = OP_MEM;
1154
1155
if (ctxt->ad_bytes == 2) {
1156
unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1157
unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1158
unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1159
unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1160
1161
/* 16-bit ModR/M decode. */
1162
switch (ctxt->modrm_mod) {
1163
case 0:
1164
if (ctxt->modrm_rm == 6)
1165
modrm_ea += insn_fetch(u16, ctxt);
1166
break;
1167
case 1:
1168
modrm_ea += insn_fetch(s8, ctxt);
1169
break;
1170
case 2:
1171
modrm_ea += insn_fetch(u16, ctxt);
1172
break;
1173
}
1174
switch (ctxt->modrm_rm) {
1175
case 0:
1176
modrm_ea += bx + si;
1177
break;
1178
case 1:
1179
modrm_ea += bx + di;
1180
break;
1181
case 2:
1182
modrm_ea += bp + si;
1183
break;
1184
case 3:
1185
modrm_ea += bp + di;
1186
break;
1187
case 4:
1188
modrm_ea += si;
1189
break;
1190
case 5:
1191
modrm_ea += di;
1192
break;
1193
case 6:
1194
if (ctxt->modrm_mod != 0)
1195
modrm_ea += bp;
1196
break;
1197
case 7:
1198
modrm_ea += bx;
1199
break;
1200
}
1201
if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1202
(ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1203
ctxt->modrm_seg = VCPU_SREG_SS;
1204
modrm_ea = (u16)modrm_ea;
1205
} else {
1206
/* 32/64-bit ModR/M decode. */
1207
if ((ctxt->modrm_rm & 7) == 4) {
1208
sib = insn_fetch(u8, ctxt);
1209
index_reg |= (sib >> 3) & 7;
1210
base_reg |= sib & 7;
1211
scale = sib >> 6;
1212
1213
if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1214
modrm_ea += insn_fetch(s32, ctxt);
1215
else {
1216
modrm_ea += reg_read(ctxt, base_reg);
1217
adjust_modrm_seg(ctxt, base_reg);
1218
/* Increment ESP on POP [ESP] */
1219
if ((ctxt->d & IncSP) &&
1220
base_reg == VCPU_REGS_RSP)
1221
modrm_ea += ctxt->op_bytes;
1222
}
1223
if (index_reg != 4)
1224
modrm_ea += reg_read(ctxt, index_reg) << scale;
1225
} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1226
modrm_ea += insn_fetch(s32, ctxt);
1227
if (ctxt->mode == X86EMUL_MODE_PROT64)
1228
ctxt->rip_relative = 1;
1229
} else {
1230
base_reg = ctxt->modrm_rm;
1231
modrm_ea += reg_read(ctxt, base_reg);
1232
adjust_modrm_seg(ctxt, base_reg);
1233
}
1234
switch (ctxt->modrm_mod) {
1235
case 1:
1236
modrm_ea += insn_fetch(s8, ctxt);
1237
break;
1238
case 2:
1239
modrm_ea += insn_fetch(s32, ctxt);
1240
break;
1241
}
1242
}
1243
op->addr.mem.ea = modrm_ea;
1244
if (ctxt->ad_bytes != 8)
1245
ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1246
1247
done:
1248
return rc;
1249
}
1250
1251
static int decode_abs(struct x86_emulate_ctxt *ctxt,
1252
struct operand *op)
1253
{
1254
int rc = X86EMUL_CONTINUE;
1255
1256
op->type = OP_MEM;
1257
switch (ctxt->ad_bytes) {
1258
case 2:
1259
op->addr.mem.ea = insn_fetch(u16, ctxt);
1260
break;
1261
case 4:
1262
op->addr.mem.ea = insn_fetch(u32, ctxt);
1263
break;
1264
case 8:
1265
op->addr.mem.ea = insn_fetch(u64, ctxt);
1266
break;
1267
}
1268
done:
1269
return rc;
1270
}
1271
1272
static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1273
{
1274
long sv = 0, mask;
1275
1276
if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1277
mask = ~((long)ctxt->dst.bytes * 8 - 1);
1278
1279
if (ctxt->src.bytes == 2)
1280
sv = (s16)ctxt->src.val & (s16)mask;
1281
else if (ctxt->src.bytes == 4)
1282
sv = (s32)ctxt->src.val & (s32)mask;
1283
else
1284
sv = (s64)ctxt->src.val & (s64)mask;
1285
1286
ctxt->dst.addr.mem.ea = address_mask(ctxt,
1287
ctxt->dst.addr.mem.ea + (sv >> 3));
1288
}
1289
1290
/* only subword offset */
1291
ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1292
}
1293
1294
static int read_emulated(struct x86_emulate_ctxt *ctxt,
1295
unsigned long addr, void *dest, unsigned size)
1296
{
1297
int rc;
1298
struct read_cache *mc = &ctxt->mem_read;
1299
1300
if (mc->pos < mc->end)
1301
goto read_cached;
1302
1303
if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1304
return X86EMUL_UNHANDLEABLE;
1305
1306
rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1307
&ctxt->exception);
1308
if (rc != X86EMUL_CONTINUE)
1309
return rc;
1310
1311
mc->end += size;
1312
1313
read_cached:
1314
memcpy(dest, mc->data + mc->pos, size);
1315
mc->pos += size;
1316
return X86EMUL_CONTINUE;
1317
}
1318
1319
static int segmented_read(struct x86_emulate_ctxt *ctxt,
1320
struct segmented_address addr,
1321
void *data,
1322
unsigned size)
1323
{
1324
int rc;
1325
ulong linear;
1326
1327
rc = linearize(ctxt, addr, size, false, &linear);
1328
if (rc != X86EMUL_CONTINUE)
1329
return rc;
1330
return read_emulated(ctxt, linear, data, size);
1331
}
1332
1333
static int segmented_write(struct x86_emulate_ctxt *ctxt,
1334
struct segmented_address addr,
1335
const void *data,
1336
unsigned size)
1337
{
1338
int rc;
1339
ulong linear;
1340
1341
rc = linearize(ctxt, addr, size, true, &linear);
1342
if (rc != X86EMUL_CONTINUE)
1343
return rc;
1344
return ctxt->ops->write_emulated(ctxt, linear, data, size,
1345
&ctxt->exception);
1346
}
1347
1348
static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1349
struct segmented_address addr,
1350
const void *orig_data, const void *data,
1351
unsigned size)
1352
{
1353
int rc;
1354
ulong linear;
1355
1356
rc = linearize(ctxt, addr, size, true, &linear);
1357
if (rc != X86EMUL_CONTINUE)
1358
return rc;
1359
return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1360
size, &ctxt->exception);
1361
}
1362
1363
static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1364
unsigned int size, unsigned short port,
1365
void *dest)
1366
{
1367
struct read_cache *rc = &ctxt->io_read;
1368
1369
if (rc->pos == rc->end) { /* refill pio read ahead */
1370
unsigned int in_page, n;
1371
unsigned int count = ctxt->rep_prefix ?
1372
address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1373
in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1374
offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1375
PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1376
n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1377
if (n == 0)
1378
n = 1;
1379
rc->pos = rc->end = 0;
1380
if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1381
return 0;
1382
rc->end = n * size;
1383
}
1384
1385
if (ctxt->rep_prefix && (ctxt->d & String) &&
1386
!(ctxt->eflags & X86_EFLAGS_DF)) {
1387
ctxt->dst.data = rc->data + rc->pos;
1388
ctxt->dst.type = OP_MEM_STR;
1389
ctxt->dst.count = (rc->end - rc->pos) / size;
1390
rc->pos = rc->end;
1391
} else {
1392
memcpy(dest, rc->data + rc->pos, size);
1393
rc->pos += size;
1394
}
1395
return 1;
1396
}
1397
1398
static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1399
u16 index, struct desc_struct *desc)
1400
{
1401
struct desc_ptr dt;
1402
ulong addr;
1403
1404
ctxt->ops->get_idt(ctxt, &dt);
1405
1406
if (dt.size < index * 8 + 7)
1407
return emulate_gp(ctxt, index << 3 | 0x2);
1408
1409
addr = dt.address + index * 8;
1410
return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1411
}
1412
1413
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1414
u16 selector, struct desc_ptr *dt)
1415
{
1416
const struct x86_emulate_ops *ops = ctxt->ops;
1417
u32 base3 = 0;
1418
1419
if (selector & 1 << 2) {
1420
struct desc_struct desc;
1421
u16 sel;
1422
1423
memset(dt, 0, sizeof(*dt));
1424
if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1425
VCPU_SREG_LDTR))
1426
return;
1427
1428
dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1429
dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1430
} else
1431
ops->get_gdt(ctxt, dt);
1432
}
1433
1434
static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1435
u16 selector, ulong *desc_addr_p)
1436
{
1437
struct desc_ptr dt;
1438
u16 index = selector >> 3;
1439
ulong addr;
1440
1441
get_descriptor_table_ptr(ctxt, selector, &dt);
1442
1443
if (dt.size < index * 8 + 7)
1444
return emulate_gp(ctxt, selector & 0xfffc);
1445
1446
addr = dt.address + index * 8;
1447
1448
#ifdef CONFIG_X86_64
1449
if (addr >> 32 != 0) {
1450
u64 efer = 0;
1451
1452
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1453
if (!(efer & EFER_LMA))
1454
addr &= (u32)-1;
1455
}
1456
#endif
1457
1458
*desc_addr_p = addr;
1459
return X86EMUL_CONTINUE;
1460
}
1461
1462
/* allowed just for 8 bytes segments */
1463
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1464
u16 selector, struct desc_struct *desc,
1465
ulong *desc_addr_p)
1466
{
1467
int rc;
1468
1469
rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1470
if (rc != X86EMUL_CONTINUE)
1471
return rc;
1472
1473
return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1474
}
1475
1476
/* allowed just for 8 bytes segments */
1477
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1478
u16 selector, struct desc_struct *desc)
1479
{
1480
int rc;
1481
ulong addr;
1482
1483
rc = get_descriptor_ptr(ctxt, selector, &addr);
1484
if (rc != X86EMUL_CONTINUE)
1485
return rc;
1486
1487
return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1488
}
1489
1490
static bool emulator_is_ssp_invalid(struct x86_emulate_ctxt *ctxt, u8 cpl)
1491
{
1492
const u32 MSR_IA32_X_CET = cpl == 3 ? MSR_IA32_U_CET : MSR_IA32_S_CET;
1493
u64 efer = 0, cet = 0, ssp = 0;
1494
1495
if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET))
1496
return false;
1497
1498
if (ctxt->ops->get_msr(ctxt, MSR_EFER, &efer))
1499
return true;
1500
1501
/* SSP is guaranteed to be valid if the vCPU was already in 32-bit mode. */
1502
if (!(efer & EFER_LMA))
1503
return false;
1504
1505
if (ctxt->ops->get_msr(ctxt, MSR_IA32_X_CET, &cet))
1506
return true;
1507
1508
if (!(cet & CET_SHSTK_EN))
1509
return false;
1510
1511
if (ctxt->ops->get_msr(ctxt, MSR_KVM_INTERNAL_GUEST_SSP, &ssp))
1512
return true;
1513
1514
/*
1515
* On transfer from 64-bit mode to compatibility mode, SSP[63:32] must
1516
* be 0, i.e. SSP must be a 32-bit value outside of 64-bit mode.
1517
*/
1518
return ssp >> 32;
1519
}
1520
1521
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1522
u16 selector, int seg, u8 cpl,
1523
enum x86_transfer_type transfer,
1524
struct desc_struct *desc)
1525
{
1526
struct desc_struct seg_desc, old_desc;
1527
u8 dpl, rpl;
1528
unsigned err_vec = GP_VECTOR;
1529
u32 err_code = 0;
1530
bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1531
ulong desc_addr;
1532
int ret;
1533
u16 dummy;
1534
u32 base3 = 0;
1535
1536
memset(&seg_desc, 0, sizeof(seg_desc));
1537
1538
if (ctxt->mode == X86EMUL_MODE_REAL) {
1539
/* set real mode segment descriptor (keep limit etc. for
1540
* unreal mode) */
1541
ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1542
set_desc_base(&seg_desc, selector << 4);
1543
goto load;
1544
} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1545
/* VM86 needs a clean new segment descriptor */
1546
set_desc_base(&seg_desc, selector << 4);
1547
set_desc_limit(&seg_desc, 0xffff);
1548
seg_desc.type = 3;
1549
seg_desc.p = 1;
1550
seg_desc.s = 1;
1551
seg_desc.dpl = 3;
1552
goto load;
1553
}
1554
1555
rpl = selector & 3;
1556
1557
/* TR should be in GDT only */
1558
if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1559
goto exception;
1560
1561
/* NULL selector is not valid for TR, CS and (except for long mode) SS */
1562
if (null_selector) {
1563
if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1564
goto exception;
1565
1566
if (seg == VCPU_SREG_SS) {
1567
if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1568
goto exception;
1569
1570
/*
1571
* ctxt->ops->set_segment expects the CPL to be in
1572
* SS.DPL, so fake an expand-up 32-bit data segment.
1573
*/
1574
seg_desc.type = 3;
1575
seg_desc.p = 1;
1576
seg_desc.s = 1;
1577
seg_desc.dpl = cpl;
1578
seg_desc.d = 1;
1579
seg_desc.g = 1;
1580
}
1581
1582
/* Skip all following checks */
1583
goto load;
1584
}
1585
1586
ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1587
if (ret != X86EMUL_CONTINUE)
1588
return ret;
1589
1590
err_code = selector & 0xfffc;
1591
err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1592
GP_VECTOR;
1593
1594
/* can't load system descriptor into segment selector */
1595
if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1596
if (transfer == X86_TRANSFER_CALL_JMP)
1597
return X86EMUL_UNHANDLEABLE;
1598
goto exception;
1599
}
1600
1601
dpl = seg_desc.dpl;
1602
1603
switch (seg) {
1604
case VCPU_SREG_SS:
1605
/*
1606
* segment is not a writable data segment or segment
1607
* selector's RPL != CPL or DPL != CPL
1608
*/
1609
if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1610
goto exception;
1611
break;
1612
case VCPU_SREG_CS:
1613
/*
1614
* KVM uses "none" when loading CS as part of emulating Real
1615
* Mode exceptions and IRET (handled above). In all other
1616
* cases, loading CS without a control transfer is a KVM bug.
1617
*/
1618
if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1619
goto exception;
1620
1621
if (!(seg_desc.type & 8))
1622
goto exception;
1623
1624
if (transfer == X86_TRANSFER_RET) {
1625
/* RET can never return to an inner privilege level. */
1626
if (rpl < cpl)
1627
goto exception;
1628
/* Outer-privilege level return is not implemented */
1629
if (rpl > cpl)
1630
return X86EMUL_UNHANDLEABLE;
1631
}
1632
if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1633
if (seg_desc.type & 4) {
1634
/* conforming */
1635
if (dpl > rpl)
1636
goto exception;
1637
} else {
1638
/* nonconforming */
1639
if (dpl != rpl)
1640
goto exception;
1641
}
1642
} else { /* X86_TRANSFER_CALL_JMP */
1643
if (seg_desc.type & 4) {
1644
/* conforming */
1645
if (dpl > cpl)
1646
goto exception;
1647
} else {
1648
/* nonconforming */
1649
if (rpl > cpl || dpl != cpl)
1650
goto exception;
1651
}
1652
}
1653
/* in long-mode d/b must be clear if l is set */
1654
if (seg_desc.d && seg_desc.l) {
1655
u64 efer = 0;
1656
1657
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1658
if (efer & EFER_LMA)
1659
goto exception;
1660
}
1661
if (!seg_desc.l && emulator_is_ssp_invalid(ctxt, cpl)) {
1662
err_code = 0;
1663
goto exception;
1664
}
1665
1666
/* CS(RPL) <- CPL */
1667
selector = (selector & 0xfffc) | cpl;
1668
break;
1669
case VCPU_SREG_TR:
1670
if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1671
goto exception;
1672
break;
1673
case VCPU_SREG_LDTR:
1674
if (seg_desc.s || seg_desc.type != 2)
1675
goto exception;
1676
break;
1677
default: /* DS, ES, FS, or GS */
1678
/*
1679
* segment is not a data or readable code segment or
1680
* ((segment is a data or nonconforming code segment)
1681
* and ((RPL > DPL) or (CPL > DPL)))
1682
*/
1683
if ((seg_desc.type & 0xa) == 0x8 ||
1684
(((seg_desc.type & 0xc) != 0xc) &&
1685
(rpl > dpl || cpl > dpl)))
1686
goto exception;
1687
break;
1688
}
1689
1690
if (!seg_desc.p) {
1691
err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1692
goto exception;
1693
}
1694
1695
if (seg_desc.s) {
1696
/* mark segment as accessed */
1697
if (!(seg_desc.type & 1)) {
1698
seg_desc.type |= 1;
1699
ret = write_segment_descriptor(ctxt, selector,
1700
&seg_desc);
1701
if (ret != X86EMUL_CONTINUE)
1702
return ret;
1703
}
1704
} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1705
ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1706
if (ret != X86EMUL_CONTINUE)
1707
return ret;
1708
if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1709
((u64)base3 << 32), ctxt,
1710
X86EMUL_F_DT_LOAD))
1711
return emulate_gp(ctxt, err_code);
1712
}
1713
1714
if (seg == VCPU_SREG_TR) {
1715
old_desc = seg_desc;
1716
seg_desc.type |= 2; /* busy */
1717
ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1718
sizeof(seg_desc), &ctxt->exception);
1719
if (ret != X86EMUL_CONTINUE)
1720
return ret;
1721
}
1722
load:
1723
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1724
if (desc)
1725
*desc = seg_desc;
1726
return X86EMUL_CONTINUE;
1727
exception:
1728
return emulate_exception(ctxt, err_vec, err_code, true);
1729
}
1730
1731
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1732
u16 selector, int seg)
1733
{
1734
u8 cpl = ctxt->ops->cpl(ctxt);
1735
1736
/*
1737
* None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1738
* they can load it at CPL<3 (Intel's manual says only LSS can,
1739
* but it's wrong).
1740
*
1741
* However, the Intel manual says that putting IST=1/DPL=3 in
1742
* an interrupt gate will result in SS=3 (the AMD manual instead
1743
* says it doesn't), so allow SS=3 in __load_segment_descriptor
1744
* and only forbid it here.
1745
*/
1746
if (seg == VCPU_SREG_SS && selector == 3 &&
1747
ctxt->mode == X86EMUL_MODE_PROT64)
1748
return emulate_exception(ctxt, GP_VECTOR, 0, true);
1749
1750
return __load_segment_descriptor(ctxt, selector, seg, cpl,
1751
X86_TRANSFER_NONE, NULL);
1752
}
1753
1754
static void write_register_operand(struct operand *op)
1755
{
1756
return assign_register(op->addr.reg, op->val, op->bytes);
1757
}
1758
1759
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1760
{
1761
switch (op->type) {
1762
case OP_REG:
1763
write_register_operand(op);
1764
break;
1765
case OP_MEM:
1766
if (ctxt->lock_prefix)
1767
return segmented_cmpxchg(ctxt,
1768
op->addr.mem,
1769
&op->orig_val,
1770
&op->val,
1771
op->bytes);
1772
else
1773
return segmented_write(ctxt,
1774
op->addr.mem,
1775
&op->val,
1776
op->bytes);
1777
case OP_MEM_STR:
1778
return segmented_write(ctxt,
1779
op->addr.mem,
1780
op->data,
1781
op->bytes * op->count);
1782
case OP_XMM:
1783
if (!(ctxt->d & Avx)) {
1784
kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1785
break;
1786
}
1787
/* full YMM write but with high bytes cleared */
1788
memset(op->valptr + 16, 0, 16);
1789
fallthrough;
1790
case OP_YMM:
1791
kvm_write_avx_reg(op->addr.xmm, &op->vec_val2);
1792
break;
1793
case OP_MM:
1794
kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1795
break;
1796
case OP_NONE:
1797
/* no writeback */
1798
break;
1799
default:
1800
break;
1801
}
1802
return X86EMUL_CONTINUE;
1803
}
1804
1805
static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len)
1806
{
1807
struct segmented_address addr;
1808
1809
rsp_increment(ctxt, -len);
1810
addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1811
addr.seg = VCPU_SREG_SS;
1812
1813
return segmented_write(ctxt, addr, data, len);
1814
}
1815
1816
static int em_push(struct x86_emulate_ctxt *ctxt)
1817
{
1818
/* Disable writeback. */
1819
ctxt->dst.type = OP_NONE;
1820
return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1821
}
1822
1823
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1824
void *dest, int len)
1825
{
1826
int rc;
1827
struct segmented_address addr;
1828
1829
addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1830
addr.seg = VCPU_SREG_SS;
1831
rc = segmented_read(ctxt, addr, dest, len);
1832
if (rc != X86EMUL_CONTINUE)
1833
return rc;
1834
1835
rsp_increment(ctxt, len);
1836
return rc;
1837
}
1838
1839
static int em_pop(struct x86_emulate_ctxt *ctxt)
1840
{
1841
return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1842
}
1843
1844
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1845
void *dest, int len)
1846
{
1847
int rc;
1848
unsigned long val = 0;
1849
unsigned long change_mask;
1850
int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1851
int cpl = ctxt->ops->cpl(ctxt);
1852
1853
rc = emulate_pop(ctxt, &val, len);
1854
if (rc != X86EMUL_CONTINUE)
1855
return rc;
1856
1857
change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1858
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1859
X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1860
X86_EFLAGS_AC | X86_EFLAGS_ID;
1861
1862
switch(ctxt->mode) {
1863
case X86EMUL_MODE_PROT64:
1864
case X86EMUL_MODE_PROT32:
1865
case X86EMUL_MODE_PROT16:
1866
if (cpl == 0)
1867
change_mask |= X86_EFLAGS_IOPL;
1868
if (cpl <= iopl)
1869
change_mask |= X86_EFLAGS_IF;
1870
break;
1871
case X86EMUL_MODE_VM86:
1872
if (iopl < 3)
1873
return emulate_gp(ctxt, 0);
1874
change_mask |= X86_EFLAGS_IF;
1875
break;
1876
default: /* real mode */
1877
change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1878
break;
1879
}
1880
1881
*(unsigned long *)dest =
1882
(ctxt->eflags & ~change_mask) | (val & change_mask);
1883
1884
return rc;
1885
}
1886
1887
static int em_popf(struct x86_emulate_ctxt *ctxt)
1888
{
1889
ctxt->dst.type = OP_REG;
1890
ctxt->dst.addr.reg = &ctxt->eflags;
1891
ctxt->dst.bytes = ctxt->op_bytes;
1892
return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1893
}
1894
1895
static int em_enter(struct x86_emulate_ctxt *ctxt)
1896
{
1897
int rc;
1898
unsigned frame_size = ctxt->src.val;
1899
unsigned nesting_level = ctxt->src2.val & 31;
1900
ulong rbp;
1901
1902
if (nesting_level)
1903
return X86EMUL_UNHANDLEABLE;
1904
1905
rbp = reg_read(ctxt, VCPU_REGS_RBP);
1906
rc = emulate_push(ctxt, &rbp, stack_size(ctxt));
1907
if (rc != X86EMUL_CONTINUE)
1908
return rc;
1909
assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1910
stack_mask(ctxt));
1911
assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1912
reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1913
stack_mask(ctxt));
1914
return X86EMUL_CONTINUE;
1915
}
1916
1917
static int em_leave(struct x86_emulate_ctxt *ctxt)
1918
{
1919
assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1920
stack_mask(ctxt));
1921
return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1922
}
1923
1924
static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1925
{
1926
int seg = ctxt->src2.val;
1927
1928
ctxt->src.val = get_segment_selector(ctxt, seg);
1929
if (ctxt->op_bytes == 4) {
1930
rsp_increment(ctxt, -2);
1931
ctxt->op_bytes = 2;
1932
}
1933
1934
return em_push(ctxt);
1935
}
1936
1937
static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1938
{
1939
int seg = ctxt->src2.val;
1940
unsigned long selector = 0;
1941
int rc;
1942
1943
rc = emulate_pop(ctxt, &selector, 2);
1944
if (rc != X86EMUL_CONTINUE)
1945
return rc;
1946
1947
if (seg == VCPU_SREG_SS)
1948
ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1949
if (ctxt->op_bytes > 2)
1950
rsp_increment(ctxt, ctxt->op_bytes - 2);
1951
1952
rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1953
return rc;
1954
}
1955
1956
static int em_pusha(struct x86_emulate_ctxt *ctxt)
1957
{
1958
unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1959
int rc = X86EMUL_CONTINUE;
1960
int reg = VCPU_REGS_RAX;
1961
1962
while (reg <= VCPU_REGS_RDI) {
1963
(reg == VCPU_REGS_RSP) ?
1964
(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1965
1966
rc = em_push(ctxt);
1967
if (rc != X86EMUL_CONTINUE)
1968
return rc;
1969
1970
++reg;
1971
}
1972
1973
return rc;
1974
}
1975
1976
static int em_pushf(struct x86_emulate_ctxt *ctxt)
1977
{
1978
ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1979
return em_push(ctxt);
1980
}
1981
1982
static int em_popa(struct x86_emulate_ctxt *ctxt)
1983
{
1984
int rc = X86EMUL_CONTINUE;
1985
int reg = VCPU_REGS_RDI;
1986
u32 val = 0;
1987
1988
while (reg >= VCPU_REGS_RAX) {
1989
if (reg == VCPU_REGS_RSP) {
1990
rsp_increment(ctxt, ctxt->op_bytes);
1991
--reg;
1992
}
1993
1994
rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
1995
if (rc != X86EMUL_CONTINUE)
1996
break;
1997
assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
1998
--reg;
1999
}
2000
return rc;
2001
}
2002
2003
static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2004
{
2005
const struct x86_emulate_ops *ops = ctxt->ops;
2006
int rc;
2007
struct desc_ptr dt;
2008
gva_t cs_addr;
2009
gva_t eip_addr;
2010
u16 cs, eip;
2011
2012
/* TODO: Add limit checks */
2013
ctxt->src.val = ctxt->eflags;
2014
rc = em_push(ctxt);
2015
if (rc != X86EMUL_CONTINUE)
2016
return rc;
2017
2018
ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2019
2020
ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2021
rc = em_push(ctxt);
2022
if (rc != X86EMUL_CONTINUE)
2023
return rc;
2024
2025
ctxt->src.val = ctxt->_eip;
2026
rc = em_push(ctxt);
2027
if (rc != X86EMUL_CONTINUE)
2028
return rc;
2029
2030
ops->get_idt(ctxt, &dt);
2031
2032
eip_addr = dt.address + (irq << 2);
2033
cs_addr = dt.address + (irq << 2) + 2;
2034
2035
rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2036
if (rc != X86EMUL_CONTINUE)
2037
return rc;
2038
2039
rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2040
if (rc != X86EMUL_CONTINUE)
2041
return rc;
2042
2043
rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2044
if (rc != X86EMUL_CONTINUE)
2045
return rc;
2046
2047
ctxt->_eip = eip;
2048
2049
return rc;
2050
}
2051
2052
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2053
{
2054
int rc;
2055
2056
invalidate_registers(ctxt);
2057
rc = __emulate_int_real(ctxt, irq);
2058
if (rc == X86EMUL_CONTINUE)
2059
writeback_registers(ctxt);
2060
return rc;
2061
}
2062
2063
static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2064
{
2065
switch(ctxt->mode) {
2066
case X86EMUL_MODE_REAL:
2067
return __emulate_int_real(ctxt, irq);
2068
case X86EMUL_MODE_VM86:
2069
case X86EMUL_MODE_PROT16:
2070
case X86EMUL_MODE_PROT32:
2071
case X86EMUL_MODE_PROT64:
2072
default:
2073
/* Protected mode interrupts unimplemented yet */
2074
return X86EMUL_UNHANDLEABLE;
2075
}
2076
}
2077
2078
static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2079
{
2080
int rc = X86EMUL_CONTINUE;
2081
unsigned long temp_eip = 0;
2082
unsigned long temp_eflags = 0;
2083
unsigned long cs = 0;
2084
unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2085
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2086
X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2087
X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2088
X86_EFLAGS_AC | X86_EFLAGS_ID |
2089
X86_EFLAGS_FIXED;
2090
unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2091
X86_EFLAGS_VIP;
2092
2093
/* TODO: Add stack limit check */
2094
2095
rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2096
2097
if (rc != X86EMUL_CONTINUE)
2098
return rc;
2099
2100
if (temp_eip & ~0xffff)
2101
return emulate_gp(ctxt, 0);
2102
2103
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2104
2105
if (rc != X86EMUL_CONTINUE)
2106
return rc;
2107
2108
rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2109
2110
if (rc != X86EMUL_CONTINUE)
2111
return rc;
2112
2113
rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2114
2115
if (rc != X86EMUL_CONTINUE)
2116
return rc;
2117
2118
ctxt->_eip = temp_eip;
2119
2120
if (ctxt->op_bytes == 4)
2121
ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2122
else if (ctxt->op_bytes == 2) {
2123
ctxt->eflags &= ~0xffff;
2124
ctxt->eflags |= temp_eflags;
2125
}
2126
2127
ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2128
ctxt->eflags |= X86_EFLAGS_FIXED;
2129
ctxt->ops->set_nmi_mask(ctxt, false);
2130
2131
return rc;
2132
}
2133
2134
static int em_iret(struct x86_emulate_ctxt *ctxt)
2135
{
2136
switch(ctxt->mode) {
2137
case X86EMUL_MODE_REAL:
2138
return emulate_iret_real(ctxt);
2139
case X86EMUL_MODE_VM86:
2140
case X86EMUL_MODE_PROT16:
2141
case X86EMUL_MODE_PROT32:
2142
case X86EMUL_MODE_PROT64:
2143
default:
2144
/* iret from protected mode unimplemented yet */
2145
return X86EMUL_UNHANDLEABLE;
2146
}
2147
}
2148
2149
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2150
{
2151
int rc;
2152
unsigned short sel;
2153
struct desc_struct new_desc;
2154
u8 cpl = ctxt->ops->cpl(ctxt);
2155
2156
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2157
2158
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2159
X86_TRANSFER_CALL_JMP,
2160
&new_desc);
2161
if (rc != X86EMUL_CONTINUE)
2162
return rc;
2163
2164
rc = assign_eip_far(ctxt, ctxt->src.val);
2165
/* Error handling is not implemented. */
2166
if (rc != X86EMUL_CONTINUE)
2167
return X86EMUL_UNHANDLEABLE;
2168
2169
return rc;
2170
}
2171
2172
static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2173
{
2174
return assign_eip_near(ctxt, ctxt->src.val);
2175
}
2176
2177
static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2178
{
2179
int rc;
2180
long int old_eip;
2181
2182
old_eip = ctxt->_eip;
2183
rc = assign_eip_near(ctxt, ctxt->src.val);
2184
if (rc != X86EMUL_CONTINUE)
2185
return rc;
2186
ctxt->src.val = old_eip;
2187
rc = em_push(ctxt);
2188
return rc;
2189
}
2190
2191
static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2192
{
2193
u64 old = ctxt->dst.orig_val64;
2194
2195
if (ctxt->dst.bytes == 16)
2196
return X86EMUL_UNHANDLEABLE;
2197
2198
if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2199
((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2200
*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2201
*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2202
ctxt->eflags &= ~X86_EFLAGS_ZF;
2203
} else {
2204
ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2205
(u32) reg_read(ctxt, VCPU_REGS_RBX);
2206
2207
ctxt->eflags |= X86_EFLAGS_ZF;
2208
}
2209
return X86EMUL_CONTINUE;
2210
}
2211
2212
static int em_ret(struct x86_emulate_ctxt *ctxt)
2213
{
2214
int rc;
2215
unsigned long eip = 0;
2216
2217
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2218
if (rc != X86EMUL_CONTINUE)
2219
return rc;
2220
2221
return assign_eip_near(ctxt, eip);
2222
}
2223
2224
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2225
{
2226
int rc;
2227
unsigned long eip = 0;
2228
unsigned long cs = 0;
2229
int cpl = ctxt->ops->cpl(ctxt);
2230
struct desc_struct new_desc;
2231
2232
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2233
if (rc != X86EMUL_CONTINUE)
2234
return rc;
2235
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2236
if (rc != X86EMUL_CONTINUE)
2237
return rc;
2238
rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2239
X86_TRANSFER_RET,
2240
&new_desc);
2241
if (rc != X86EMUL_CONTINUE)
2242
return rc;
2243
rc = assign_eip_far(ctxt, eip);
2244
/* Error handling is not implemented. */
2245
if (rc != X86EMUL_CONTINUE)
2246
return X86EMUL_UNHANDLEABLE;
2247
2248
return rc;
2249
}
2250
2251
static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2252
{
2253
int rc;
2254
2255
rc = em_ret_far(ctxt);
2256
if (rc != X86EMUL_CONTINUE)
2257
return rc;
2258
rsp_increment(ctxt, ctxt->src.val);
2259
return X86EMUL_CONTINUE;
2260
}
2261
2262
static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2263
{
2264
/* Save real source value, then compare EAX against destination. */
2265
ctxt->dst.orig_val = ctxt->dst.val;
2266
ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2267
ctxt->src.orig_val = ctxt->src.val;
2268
ctxt->src.val = ctxt->dst.orig_val;
2269
em_cmp(ctxt);
2270
2271
if (ctxt->eflags & X86_EFLAGS_ZF) {
2272
/* Success: write back to memory; no update of EAX */
2273
ctxt->src.type = OP_NONE;
2274
ctxt->dst.val = ctxt->src.orig_val;
2275
} else {
2276
/* Failure: write the value we saw to EAX. */
2277
ctxt->src.type = OP_REG;
2278
ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2279
ctxt->src.val = ctxt->dst.orig_val;
2280
/* Create write-cycle to dest by writing the same value */
2281
ctxt->dst.val = ctxt->dst.orig_val;
2282
}
2283
return X86EMUL_CONTINUE;
2284
}
2285
2286
static int em_lseg(struct x86_emulate_ctxt *ctxt)
2287
{
2288
int seg = ctxt->src2.val;
2289
unsigned short sel;
2290
int rc;
2291
2292
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2293
2294
rc = load_segment_descriptor(ctxt, sel, seg);
2295
if (rc != X86EMUL_CONTINUE)
2296
return rc;
2297
2298
ctxt->dst.val = ctxt->src.val;
2299
return rc;
2300
}
2301
2302
static int em_rsm(struct x86_emulate_ctxt *ctxt)
2303
{
2304
if (!ctxt->ops->is_smm(ctxt))
2305
return emulate_ud(ctxt);
2306
2307
if (ctxt->ops->leave_smm(ctxt))
2308
ctxt->ops->triple_fault(ctxt);
2309
2310
return emulator_recalc_and_set_mode(ctxt);
2311
}
2312
2313
static void
2314
setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2315
{
2316
cs->l = 0; /* will be adjusted later */
2317
set_desc_base(cs, 0); /* flat segment */
2318
cs->g = 1; /* 4kb granularity */
2319
set_desc_limit(cs, 0xfffff); /* 4GB limit */
2320
cs->type = 0x0b; /* Read, Execute, Accessed */
2321
cs->s = 1;
2322
cs->dpl = 0; /* will be adjusted later */
2323
cs->p = 1;
2324
cs->d = 1;
2325
cs->avl = 0;
2326
2327
set_desc_base(ss, 0); /* flat segment */
2328
set_desc_limit(ss, 0xfffff); /* 4GB limit */
2329
ss->g = 1; /* 4kb granularity */
2330
ss->s = 1;
2331
ss->type = 0x03; /* Read/Write, Accessed */
2332
ss->d = 1; /* 32bit stack segment */
2333
ss->dpl = 0;
2334
ss->p = 1;
2335
ss->l = 0;
2336
ss->avl = 0;
2337
}
2338
2339
static int em_syscall(struct x86_emulate_ctxt *ctxt)
2340
{
2341
const struct x86_emulate_ops *ops = ctxt->ops;
2342
struct desc_struct cs, ss;
2343
u64 msr_data;
2344
u16 cs_sel, ss_sel;
2345
u64 efer = 0;
2346
2347
/* syscall is not available in real mode */
2348
if (ctxt->mode == X86EMUL_MODE_REAL ||
2349
ctxt->mode == X86EMUL_MODE_VM86)
2350
return emulate_ud(ctxt);
2351
2352
/*
2353
* Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas
2354
* AMD allows SYSCALL in any flavor of protected mode. Note, it's
2355
* infeasible to emulate Intel behavior when running on AMD hardware,
2356
* as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD
2357
* for KVM to trap-and-emulate, unlike emulating AMD on Intel.
2358
*/
2359
if (ctxt->mode != X86EMUL_MODE_PROT64 &&
2360
ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
2361
return emulate_ud(ctxt);
2362
2363
ops->get_msr(ctxt, MSR_EFER, &efer);
2364
if (!(efer & EFER_SCE))
2365
return emulate_ud(ctxt);
2366
2367
setup_syscalls_segments(&cs, &ss);
2368
ops->get_msr(ctxt, MSR_STAR, &msr_data);
2369
msr_data >>= 32;
2370
cs_sel = (u16)(msr_data & 0xfffc);
2371
ss_sel = (u16)(msr_data + 8);
2372
2373
if (efer & EFER_LMA) {
2374
cs.d = 0;
2375
cs.l = 1;
2376
}
2377
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2378
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2379
2380
*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2381
if (efer & EFER_LMA) {
2382
#ifdef CONFIG_X86_64
2383
*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2384
2385
ops->get_msr(ctxt,
2386
ctxt->mode == X86EMUL_MODE_PROT64 ?
2387
MSR_LSTAR : MSR_CSTAR, &msr_data);
2388
ctxt->_eip = msr_data;
2389
2390
ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2391
ctxt->eflags &= ~msr_data;
2392
ctxt->eflags |= X86_EFLAGS_FIXED;
2393
#endif
2394
} else {
2395
/* legacy mode */
2396
ops->get_msr(ctxt, MSR_STAR, &msr_data);
2397
ctxt->_eip = (u32)msr_data;
2398
2399
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2400
}
2401
2402
ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2403
return X86EMUL_CONTINUE;
2404
}
2405
2406
static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2407
{
2408
const struct x86_emulate_ops *ops = ctxt->ops;
2409
struct desc_struct cs, ss;
2410
u64 msr_data;
2411
u16 cs_sel, ss_sel;
2412
u64 efer = 0;
2413
2414
ops->get_msr(ctxt, MSR_EFER, &efer);
2415
/* inject #GP if in real mode */
2416
if (ctxt->mode == X86EMUL_MODE_REAL)
2417
return emulate_gp(ctxt, 0);
2418
2419
/*
2420
* Intel's architecture allows SYSENTER in compatibility mode, but AMD
2421
* does not. Note, AMD does allow SYSENTER in legacy protected mode.
2422
*/
2423
if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) &&
2424
!ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
2425
return emulate_ud(ctxt);
2426
2427
/* sysenter/sysexit have not been tested in 64bit mode. */
2428
if (ctxt->mode == X86EMUL_MODE_PROT64)
2429
return X86EMUL_UNHANDLEABLE;
2430
2431
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2432
if ((msr_data & 0xfffc) == 0x0)
2433
return emulate_gp(ctxt, 0);
2434
2435
setup_syscalls_segments(&cs, &ss);
2436
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2437
cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2438
ss_sel = cs_sel + 8;
2439
if (efer & EFER_LMA) {
2440
cs.d = 0;
2441
cs.l = 1;
2442
}
2443
2444
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2445
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2446
2447
ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2448
ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2449
2450
ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2451
*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2452
(u32)msr_data;
2453
if (efer & EFER_LMA)
2454
ctxt->mode = X86EMUL_MODE_PROT64;
2455
2456
return X86EMUL_CONTINUE;
2457
}
2458
2459
static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2460
{
2461
const struct x86_emulate_ops *ops = ctxt->ops;
2462
struct desc_struct cs, ss;
2463
u64 msr_data, rcx, rdx;
2464
int usermode;
2465
u16 cs_sel = 0, ss_sel = 0;
2466
2467
/* inject #GP if in real mode or Virtual 8086 mode */
2468
if (ctxt->mode == X86EMUL_MODE_REAL ||
2469
ctxt->mode == X86EMUL_MODE_VM86)
2470
return emulate_gp(ctxt, 0);
2471
2472
setup_syscalls_segments(&cs, &ss);
2473
2474
if (ctxt->rex_bits & REX_W)
2475
usermode = X86EMUL_MODE_PROT64;
2476
else
2477
usermode = X86EMUL_MODE_PROT32;
2478
2479
rcx = reg_read(ctxt, VCPU_REGS_RCX);
2480
rdx = reg_read(ctxt, VCPU_REGS_RDX);
2481
2482
cs.dpl = 3;
2483
ss.dpl = 3;
2484
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2485
switch (usermode) {
2486
case X86EMUL_MODE_PROT32:
2487
cs_sel = (u16)(msr_data + 16);
2488
if ((msr_data & 0xfffc) == 0x0)
2489
return emulate_gp(ctxt, 0);
2490
ss_sel = (u16)(msr_data + 24);
2491
rcx = (u32)rcx;
2492
rdx = (u32)rdx;
2493
break;
2494
case X86EMUL_MODE_PROT64:
2495
cs_sel = (u16)(msr_data + 32);
2496
if (msr_data == 0x0)
2497
return emulate_gp(ctxt, 0);
2498
ss_sel = cs_sel + 8;
2499
cs.d = 0;
2500
cs.l = 1;
2501
if (emul_is_noncanonical_address(rcx, ctxt, 0) ||
2502
emul_is_noncanonical_address(rdx, ctxt, 0))
2503
return emulate_gp(ctxt, 0);
2504
break;
2505
}
2506
cs_sel |= SEGMENT_RPL_MASK;
2507
ss_sel |= SEGMENT_RPL_MASK;
2508
2509
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2510
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2511
2512
ctxt->_eip = rdx;
2513
ctxt->mode = usermode;
2514
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2515
2516
return X86EMUL_CONTINUE;
2517
}
2518
2519
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2520
{
2521
int iopl;
2522
if (ctxt->mode == X86EMUL_MODE_REAL)
2523
return false;
2524
if (ctxt->mode == X86EMUL_MODE_VM86)
2525
return true;
2526
iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2527
return ctxt->ops->cpl(ctxt) > iopl;
2528
}
2529
2530
#define VMWARE_PORT_VMPORT (0x5658)
2531
#define VMWARE_PORT_VMRPC (0x5659)
2532
2533
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2534
u16 port, u16 len)
2535
{
2536
const struct x86_emulate_ops *ops = ctxt->ops;
2537
struct desc_struct tr_seg;
2538
u32 base3;
2539
int r;
2540
u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2541
unsigned mask = (1 << len) - 1;
2542
unsigned long base;
2543
2544
/*
2545
* VMware allows access to these ports even if denied
2546
* by TSS I/O permission bitmap. Mimic behavior.
2547
*/
2548
if (enable_vmware_backdoor &&
2549
((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2550
return true;
2551
2552
ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2553
if (!tr_seg.p)
2554
return false;
2555
if (desc_limit_scaled(&tr_seg) < 103)
2556
return false;
2557
base = get_desc_base(&tr_seg);
2558
#ifdef CONFIG_X86_64
2559
base |= ((u64)base3) << 32;
2560
#endif
2561
r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2562
if (r != X86EMUL_CONTINUE)
2563
return false;
2564
if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2565
return false;
2566
r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2567
if (r != X86EMUL_CONTINUE)
2568
return false;
2569
if ((perm >> bit_idx) & mask)
2570
return false;
2571
return true;
2572
}
2573
2574
static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt,
2575
u16 port, u16 len)
2576
{
2577
if (ctxt->perm_ok)
2578
return true;
2579
2580
if (emulator_bad_iopl(ctxt))
2581
if (!emulator_io_port_access_allowed(ctxt, port, len))
2582
return false;
2583
2584
ctxt->perm_ok = true;
2585
2586
return true;
2587
}
2588
2589
static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2590
{
2591
/*
2592
* Intel CPUs mask the counter and pointers in quite strange
2593
* manner when ECX is zero due to REP-string optimizations.
2594
*/
2595
#ifdef CONFIG_X86_64
2596
u32 eax, ebx, ecx, edx;
2597
2598
if (ctxt->ad_bytes != 4)
2599
return;
2600
2601
eax = ecx = 0;
2602
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2603
if (!is_guest_vendor_intel(ebx, ecx, edx))
2604
return;
2605
2606
*reg_write(ctxt, VCPU_REGS_RCX) = 0;
2607
2608
switch (ctxt->b) {
2609
case 0xa4: /* movsb */
2610
case 0xa5: /* movsd/w */
2611
*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2612
fallthrough;
2613
case 0xaa: /* stosb */
2614
case 0xab: /* stosd/w */
2615
*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2616
}
2617
#endif
2618
}
2619
2620
static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2621
struct tss_segment_16 *tss)
2622
{
2623
tss->ip = ctxt->_eip;
2624
tss->flag = ctxt->eflags;
2625
tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2626
tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2627
tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2628
tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2629
tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2630
tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2631
tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2632
tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2633
2634
tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2635
tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2636
tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2637
tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2638
tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2639
}
2640
2641
static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2642
struct tss_segment_16 *tss)
2643
{
2644
int ret;
2645
u8 cpl;
2646
2647
ctxt->_eip = tss->ip;
2648
ctxt->eflags = tss->flag | 2;
2649
*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2650
*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2651
*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2652
*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2653
*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2654
*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2655
*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2656
*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2657
2658
/*
2659
* SDM says that segment selectors are loaded before segment
2660
* descriptors
2661
*/
2662
set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2663
set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2664
set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2665
set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2666
set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2667
2668
cpl = tss->cs & 3;
2669
2670
/*
2671
* Now load segment descriptors. If fault happens at this stage
2672
* it is handled in a context of new task
2673
*/
2674
ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2675
X86_TRANSFER_TASK_SWITCH, NULL);
2676
if (ret != X86EMUL_CONTINUE)
2677
return ret;
2678
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2679
X86_TRANSFER_TASK_SWITCH, NULL);
2680
if (ret != X86EMUL_CONTINUE)
2681
return ret;
2682
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2683
X86_TRANSFER_TASK_SWITCH, NULL);
2684
if (ret != X86EMUL_CONTINUE)
2685
return ret;
2686
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2687
X86_TRANSFER_TASK_SWITCH, NULL);
2688
if (ret != X86EMUL_CONTINUE)
2689
return ret;
2690
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2691
X86_TRANSFER_TASK_SWITCH, NULL);
2692
if (ret != X86EMUL_CONTINUE)
2693
return ret;
2694
2695
return X86EMUL_CONTINUE;
2696
}
2697
2698
static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2699
ulong old_tss_base, struct desc_struct *new_desc)
2700
{
2701
struct tss_segment_16 tss_seg;
2702
int ret;
2703
u32 new_tss_base = get_desc_base(new_desc);
2704
2705
ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2706
if (ret != X86EMUL_CONTINUE)
2707
return ret;
2708
2709
save_state_to_tss16(ctxt, &tss_seg);
2710
2711
ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2712
if (ret != X86EMUL_CONTINUE)
2713
return ret;
2714
2715
ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2716
if (ret != X86EMUL_CONTINUE)
2717
return ret;
2718
2719
if (old_tss_sel != 0xffff) {
2720
tss_seg.prev_task_link = old_tss_sel;
2721
2722
ret = linear_write_system(ctxt, new_tss_base,
2723
&tss_seg.prev_task_link,
2724
sizeof(tss_seg.prev_task_link));
2725
if (ret != X86EMUL_CONTINUE)
2726
return ret;
2727
}
2728
2729
return load_state_from_tss16(ctxt, &tss_seg);
2730
}
2731
2732
static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2733
struct tss_segment_32 *tss)
2734
{
2735
/* CR3 and ldt selector are not saved intentionally */
2736
tss->eip = ctxt->_eip;
2737
tss->eflags = ctxt->eflags;
2738
tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2739
tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2740
tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2741
tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2742
tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2743
tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2744
tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2745
tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2746
2747
tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2748
tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2749
tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2750
tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2751
tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2752
tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2753
}
2754
2755
static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2756
struct tss_segment_32 *tss)
2757
{
2758
int ret;
2759
u8 cpl;
2760
2761
if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2762
return emulate_gp(ctxt, 0);
2763
ctxt->_eip = tss->eip;
2764
ctxt->eflags = tss->eflags | 2;
2765
2766
/* General purpose registers */
2767
*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2768
*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2769
*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2770
*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2771
*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2772
*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2773
*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2774
*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2775
2776
/*
2777
* SDM says that segment selectors are loaded before segment
2778
* descriptors. This is important because CPL checks will
2779
* use CS.RPL.
2780
*/
2781
set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2782
set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2783
set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2784
set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2785
set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2786
set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2787
set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2788
2789
/*
2790
* If we're switching between Protected Mode and VM86, we need to make
2791
* sure to update the mode before loading the segment descriptors so
2792
* that the selectors are interpreted correctly.
2793
*/
2794
if (ctxt->eflags & X86_EFLAGS_VM) {
2795
ctxt->mode = X86EMUL_MODE_VM86;
2796
cpl = 3;
2797
} else {
2798
ctxt->mode = X86EMUL_MODE_PROT32;
2799
cpl = tss->cs & 3;
2800
}
2801
2802
/*
2803
* Now load segment descriptors. If fault happens at this stage
2804
* it is handled in a context of new task
2805
*/
2806
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2807
cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2808
if (ret != X86EMUL_CONTINUE)
2809
return ret;
2810
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2811
X86_TRANSFER_TASK_SWITCH, NULL);
2812
if (ret != X86EMUL_CONTINUE)
2813
return ret;
2814
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2815
X86_TRANSFER_TASK_SWITCH, NULL);
2816
if (ret != X86EMUL_CONTINUE)
2817
return ret;
2818
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2819
X86_TRANSFER_TASK_SWITCH, NULL);
2820
if (ret != X86EMUL_CONTINUE)
2821
return ret;
2822
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2823
X86_TRANSFER_TASK_SWITCH, NULL);
2824
if (ret != X86EMUL_CONTINUE)
2825
return ret;
2826
ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2827
X86_TRANSFER_TASK_SWITCH, NULL);
2828
if (ret != X86EMUL_CONTINUE)
2829
return ret;
2830
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2831
X86_TRANSFER_TASK_SWITCH, NULL);
2832
2833
return ret;
2834
}
2835
2836
static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2837
ulong old_tss_base, struct desc_struct *new_desc)
2838
{
2839
struct tss_segment_32 tss_seg;
2840
int ret;
2841
u32 new_tss_base = get_desc_base(new_desc);
2842
u32 eip_offset = offsetof(struct tss_segment_32, eip);
2843
u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2844
2845
ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2846
if (ret != X86EMUL_CONTINUE)
2847
return ret;
2848
2849
save_state_to_tss32(ctxt, &tss_seg);
2850
2851
/* Only GP registers and segment selectors are saved */
2852
ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2853
ldt_sel_offset - eip_offset);
2854
if (ret != X86EMUL_CONTINUE)
2855
return ret;
2856
2857
ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2858
if (ret != X86EMUL_CONTINUE)
2859
return ret;
2860
2861
if (old_tss_sel != 0xffff) {
2862
tss_seg.prev_task_link = old_tss_sel;
2863
2864
ret = linear_write_system(ctxt, new_tss_base,
2865
&tss_seg.prev_task_link,
2866
sizeof(tss_seg.prev_task_link));
2867
if (ret != X86EMUL_CONTINUE)
2868
return ret;
2869
}
2870
2871
return load_state_from_tss32(ctxt, &tss_seg);
2872
}
2873
2874
static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2875
u16 tss_selector, int idt_index, int reason,
2876
bool has_error_code, u32 error_code)
2877
{
2878
const struct x86_emulate_ops *ops = ctxt->ops;
2879
struct desc_struct curr_tss_desc, next_tss_desc;
2880
int ret;
2881
u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2882
ulong old_tss_base =
2883
ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2884
u32 desc_limit;
2885
ulong desc_addr, dr7;
2886
2887
/* FIXME: old_tss_base == ~0 ? */
2888
2889
ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2890
if (ret != X86EMUL_CONTINUE)
2891
return ret;
2892
ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2893
if (ret != X86EMUL_CONTINUE)
2894
return ret;
2895
2896
/* FIXME: check that next_tss_desc is tss */
2897
2898
/*
2899
* Check privileges. The three cases are task switch caused by...
2900
*
2901
* 1. jmp/call/int to task gate: Check against DPL of the task gate
2902
* 2. Exception/IRQ/iret: No check is performed
2903
* 3. jmp/call to TSS/task-gate: No check is performed since the
2904
* hardware checks it before exiting.
2905
*/
2906
if (reason == TASK_SWITCH_GATE) {
2907
if (idt_index != -1) {
2908
/* Software interrupts */
2909
struct desc_struct task_gate_desc;
2910
int dpl;
2911
2912
ret = read_interrupt_descriptor(ctxt, idt_index,
2913
&task_gate_desc);
2914
if (ret != X86EMUL_CONTINUE)
2915
return ret;
2916
2917
dpl = task_gate_desc.dpl;
2918
if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2919
return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2920
}
2921
}
2922
2923
desc_limit = desc_limit_scaled(&next_tss_desc);
2924
if (!next_tss_desc.p ||
2925
((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2926
desc_limit < 0x2b)) {
2927
return emulate_ts(ctxt, tss_selector & 0xfffc);
2928
}
2929
2930
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2931
curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2932
write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2933
}
2934
2935
if (reason == TASK_SWITCH_IRET)
2936
ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2937
2938
/* set back link to prev task only if NT bit is set in eflags
2939
note that old_tss_sel is not used after this point */
2940
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2941
old_tss_sel = 0xffff;
2942
2943
if (next_tss_desc.type & 8)
2944
ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
2945
else
2946
ret = task_switch_16(ctxt, old_tss_sel,
2947
old_tss_base, &next_tss_desc);
2948
if (ret != X86EMUL_CONTINUE)
2949
return ret;
2950
2951
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2952
ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2953
2954
if (reason != TASK_SWITCH_IRET) {
2955
next_tss_desc.type |= (1 << 1); /* set busy flag */
2956
write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
2957
}
2958
2959
ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
2960
ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
2961
2962
if (has_error_code) {
2963
ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2964
ctxt->lock_prefix = 0;
2965
ctxt->src.val = (unsigned long) error_code;
2966
ret = em_push(ctxt);
2967
}
2968
2969
dr7 = ops->get_dr(ctxt, 7);
2970
ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
2971
2972
return ret;
2973
}
2974
2975
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2976
u16 tss_selector, int idt_index, int reason,
2977
bool has_error_code, u32 error_code)
2978
{
2979
int rc;
2980
2981
invalidate_registers(ctxt);
2982
ctxt->_eip = ctxt->eip;
2983
ctxt->dst.type = OP_NONE;
2984
2985
rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
2986
has_error_code, error_code);
2987
2988
if (rc == X86EMUL_CONTINUE) {
2989
ctxt->eip = ctxt->_eip;
2990
writeback_registers(ctxt);
2991
}
2992
2993
return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
2994
}
2995
2996
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
2997
struct operand *op)
2998
{
2999
int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3000
3001
register_address_increment(ctxt, reg, df * op->bytes);
3002
op->addr.mem.ea = register_address(ctxt, reg);
3003
}
3004
3005
static int em_das(struct x86_emulate_ctxt *ctxt)
3006
{
3007
u8 al, old_al;
3008
bool af, cf, old_cf;
3009
3010
cf = ctxt->eflags & X86_EFLAGS_CF;
3011
al = ctxt->dst.val;
3012
3013
old_al = al;
3014
old_cf = cf;
3015
cf = false;
3016
af = ctxt->eflags & X86_EFLAGS_AF;
3017
if ((al & 0x0f) > 9 || af) {
3018
al -= 6;
3019
cf = old_cf | (al >= 250);
3020
af = true;
3021
} else {
3022
af = false;
3023
}
3024
if (old_al > 0x99 || old_cf) {
3025
al -= 0x60;
3026
cf = true;
3027
}
3028
3029
ctxt->dst.val = al;
3030
/* Set PF, ZF, SF */
3031
ctxt->src.type = OP_IMM;
3032
ctxt->src.val = 0;
3033
ctxt->src.bytes = 1;
3034
em_or(ctxt);
3035
ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3036
if (cf)
3037
ctxt->eflags |= X86_EFLAGS_CF;
3038
if (af)
3039
ctxt->eflags |= X86_EFLAGS_AF;
3040
return X86EMUL_CONTINUE;
3041
}
3042
3043
static int em_aam(struct x86_emulate_ctxt *ctxt)
3044
{
3045
u8 al, ah;
3046
3047
if (ctxt->src.val == 0)
3048
return emulate_de(ctxt);
3049
3050
al = ctxt->dst.val & 0xff;
3051
ah = al / ctxt->src.val;
3052
al %= ctxt->src.val;
3053
3054
ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3055
3056
/* Set PF, ZF, SF */
3057
ctxt->src.type = OP_IMM;
3058
ctxt->src.val = 0;
3059
ctxt->src.bytes = 1;
3060
em_or(ctxt);
3061
3062
return X86EMUL_CONTINUE;
3063
}
3064
3065
static int em_aad(struct x86_emulate_ctxt *ctxt)
3066
{
3067
u8 al = ctxt->dst.val & 0xff;
3068
u8 ah = (ctxt->dst.val >> 8) & 0xff;
3069
3070
al = (al + (ah * ctxt->src.val)) & 0xff;
3071
3072
ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3073
3074
/* Set PF, ZF, SF */
3075
ctxt->src.type = OP_IMM;
3076
ctxt->src.val = 0;
3077
ctxt->src.bytes = 1;
3078
em_or(ctxt);
3079
3080
return X86EMUL_CONTINUE;
3081
}
3082
3083
static int em_call(struct x86_emulate_ctxt *ctxt)
3084
{
3085
int rc;
3086
long rel = ctxt->src.val;
3087
3088
ctxt->src.val = (unsigned long)ctxt->_eip;
3089
rc = jmp_rel(ctxt, rel);
3090
if (rc != X86EMUL_CONTINUE)
3091
return rc;
3092
return em_push(ctxt);
3093
}
3094
3095
static int em_call_far(struct x86_emulate_ctxt *ctxt)
3096
{
3097
u16 sel, old_cs;
3098
ulong old_eip;
3099
int rc;
3100
struct desc_struct old_desc, new_desc;
3101
const struct x86_emulate_ops *ops = ctxt->ops;
3102
int cpl = ctxt->ops->cpl(ctxt);
3103
enum x86emul_mode prev_mode = ctxt->mode;
3104
3105
old_eip = ctxt->_eip;
3106
ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3107
3108
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3109
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3110
X86_TRANSFER_CALL_JMP, &new_desc);
3111
if (rc != X86EMUL_CONTINUE)
3112
return rc;
3113
3114
rc = assign_eip_far(ctxt, ctxt->src.val);
3115
if (rc != X86EMUL_CONTINUE)
3116
goto fail;
3117
3118
ctxt->src.val = old_cs;
3119
rc = em_push(ctxt);
3120
if (rc != X86EMUL_CONTINUE)
3121
goto fail;
3122
3123
ctxt->src.val = old_eip;
3124
rc = em_push(ctxt);
3125
/* If we failed, we tainted the memory, but the very least we should
3126
restore cs */
3127
if (rc != X86EMUL_CONTINUE) {
3128
pr_warn_once("faulting far call emulation tainted memory\n");
3129
goto fail;
3130
}
3131
return rc;
3132
fail:
3133
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3134
ctxt->mode = prev_mode;
3135
return rc;
3136
3137
}
3138
3139
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3140
{
3141
int rc;
3142
unsigned long eip = 0;
3143
3144
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3145
if (rc != X86EMUL_CONTINUE)
3146
return rc;
3147
rc = assign_eip_near(ctxt, eip);
3148
if (rc != X86EMUL_CONTINUE)
3149
return rc;
3150
rsp_increment(ctxt, ctxt->src.val);
3151
return X86EMUL_CONTINUE;
3152
}
3153
3154
static int em_xchg(struct x86_emulate_ctxt *ctxt)
3155
{
3156
/* Write back the register source. */
3157
ctxt->src.val = ctxt->dst.val;
3158
write_register_operand(&ctxt->src);
3159
3160
/* Write back the memory destination with implicit LOCK prefix. */
3161
ctxt->dst.val = ctxt->src.orig_val;
3162
ctxt->lock_prefix = 1;
3163
return X86EMUL_CONTINUE;
3164
}
3165
3166
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3167
{
3168
ctxt->dst.val = ctxt->src2.val;
3169
return em_imul(ctxt);
3170
}
3171
3172
static int em_cwd(struct x86_emulate_ctxt *ctxt)
3173
{
3174
ctxt->dst.type = OP_REG;
3175
ctxt->dst.bytes = ctxt->src.bytes;
3176
ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3177
ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3178
3179
return X86EMUL_CONTINUE;
3180
}
3181
3182
static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3183
{
3184
u64 tsc_aux = 0;
3185
3186
if (!ctxt->ops->guest_has_rdpid(ctxt))
3187
return emulate_ud(ctxt);
3188
3189
ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3190
ctxt->dst.val = tsc_aux;
3191
return X86EMUL_CONTINUE;
3192
}
3193
3194
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3195
{
3196
u64 tsc = 0;
3197
3198
ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3199
*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3200
*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3201
return X86EMUL_CONTINUE;
3202
}
3203
3204
static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3205
{
3206
u64 pmc;
3207
3208
if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3209
return emulate_gp(ctxt, 0);
3210
*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3211
*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3212
return X86EMUL_CONTINUE;
3213
}
3214
3215
static int em_mov(struct x86_emulate_ctxt *ctxt)
3216
{
3217
memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3218
return X86EMUL_CONTINUE;
3219
}
3220
3221
static int em_movbe(struct x86_emulate_ctxt *ctxt)
3222
{
3223
u16 tmp;
3224
3225
if (!ctxt->ops->guest_has_movbe(ctxt))
3226
return emulate_ud(ctxt);
3227
3228
switch (ctxt->op_bytes) {
3229
case 2:
3230
/*
3231
* From MOVBE definition: "...When the operand size is 16 bits,
3232
* the upper word of the destination register remains unchanged
3233
* ..."
3234
*
3235
* Both casting ->valptr and ->val to u16 breaks strict aliasing
3236
* rules so we have to do the operation almost per hand.
3237
*/
3238
tmp = (u16)ctxt->src.val;
3239
ctxt->dst.val &= ~0xffffUL;
3240
ctxt->dst.val |= (unsigned long)swab16(tmp);
3241
break;
3242
case 4:
3243
ctxt->dst.val = swab32((u32)ctxt->src.val);
3244
break;
3245
case 8:
3246
ctxt->dst.val = swab64(ctxt->src.val);
3247
break;
3248
default:
3249
BUG();
3250
}
3251
return X86EMUL_CONTINUE;
3252
}
3253
3254
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3255
{
3256
int cr_num = ctxt->modrm_reg;
3257
int r;
3258
3259
if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3260
return emulate_gp(ctxt, 0);
3261
3262
/* Disable writeback. */
3263
ctxt->dst.type = OP_NONE;
3264
3265
if (cr_num == 0) {
3266
/*
3267
* CR0 write might have updated CR0.PE and/or CR0.PG
3268
* which can affect the cpu's execution mode.
3269
*/
3270
r = emulator_recalc_and_set_mode(ctxt);
3271
if (r != X86EMUL_CONTINUE)
3272
return r;
3273
}
3274
3275
return X86EMUL_CONTINUE;
3276
}
3277
3278
static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3279
{
3280
unsigned long val;
3281
3282
if (ctxt->mode == X86EMUL_MODE_PROT64)
3283
val = ctxt->src.val & ~0ULL;
3284
else
3285
val = ctxt->src.val & ~0U;
3286
3287
/* #UD condition is already handled. */
3288
if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3289
return emulate_gp(ctxt, 0);
3290
3291
/* Disable writeback. */
3292
ctxt->dst.type = OP_NONE;
3293
return X86EMUL_CONTINUE;
3294
}
3295
3296
static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3297
{
3298
u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3299
u64 msr_data;
3300
int r;
3301
3302
msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3303
| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3304
r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3305
3306
if (r == X86EMUL_PROPAGATE_FAULT)
3307
return emulate_gp(ctxt, 0);
3308
3309
return r;
3310
}
3311
3312
static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3313
{
3314
u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3315
u64 msr_data;
3316
int r;
3317
3318
r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3319
3320
if (r == X86EMUL_PROPAGATE_FAULT)
3321
return emulate_gp(ctxt, 0);
3322
3323
if (r == X86EMUL_CONTINUE) {
3324
*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3325
*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3326
}
3327
return r;
3328
}
3329
3330
static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3331
{
3332
if (segment > VCPU_SREG_GS &&
3333
(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3334
ctxt->ops->cpl(ctxt) > 0)
3335
return emulate_gp(ctxt, 0);
3336
3337
ctxt->dst.val = get_segment_selector(ctxt, segment);
3338
if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3339
ctxt->dst.bytes = 2;
3340
return X86EMUL_CONTINUE;
3341
}
3342
3343
static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3344
{
3345
if (ctxt->modrm_reg > VCPU_SREG_GS)
3346
return emulate_ud(ctxt);
3347
3348
return em_store_sreg(ctxt, ctxt->modrm_reg);
3349
}
3350
3351
static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3352
{
3353
u16 sel = ctxt->src.val;
3354
3355
if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3356
return emulate_ud(ctxt);
3357
3358
if (ctxt->modrm_reg == VCPU_SREG_SS)
3359
ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3360
3361
/* Disable writeback. */
3362
ctxt->dst.type = OP_NONE;
3363
return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3364
}
3365
3366
static int em_sldt(struct x86_emulate_ctxt *ctxt)
3367
{
3368
return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3369
}
3370
3371
static int em_lldt(struct x86_emulate_ctxt *ctxt)
3372
{
3373
u16 sel = ctxt->src.val;
3374
3375
/* Disable writeback. */
3376
ctxt->dst.type = OP_NONE;
3377
return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3378
}
3379
3380
static int em_str(struct x86_emulate_ctxt *ctxt)
3381
{
3382
return em_store_sreg(ctxt, VCPU_SREG_TR);
3383
}
3384
3385
static int em_ltr(struct x86_emulate_ctxt *ctxt)
3386
{
3387
u16 sel = ctxt->src.val;
3388
3389
/* Disable writeback. */
3390
ctxt->dst.type = OP_NONE;
3391
return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3392
}
3393
3394
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3395
{
3396
int rc;
3397
ulong linear;
3398
unsigned int max_size;
3399
3400
rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
3401
&linear, X86EMUL_F_INVLPG);
3402
if (rc == X86EMUL_CONTINUE)
3403
ctxt->ops->invlpg(ctxt, linear);
3404
/* Disable writeback. */
3405
ctxt->dst.type = OP_NONE;
3406
return X86EMUL_CONTINUE;
3407
}
3408
3409
static int em_clts(struct x86_emulate_ctxt *ctxt)
3410
{
3411
ulong cr0;
3412
3413
cr0 = ctxt->ops->get_cr(ctxt, 0);
3414
cr0 &= ~X86_CR0_TS;
3415
ctxt->ops->set_cr(ctxt, 0, cr0);
3416
return X86EMUL_CONTINUE;
3417
}
3418
3419
static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3420
{
3421
int rc = ctxt->ops->fix_hypercall(ctxt);
3422
3423
if (rc != X86EMUL_CONTINUE)
3424
return rc;
3425
3426
/* Let the processor re-execute the fixed hypercall */
3427
ctxt->_eip = ctxt->eip;
3428
/* Disable writeback. */
3429
ctxt->dst.type = OP_NONE;
3430
return X86EMUL_CONTINUE;
3431
}
3432
3433
static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3434
void (*get)(struct x86_emulate_ctxt *ctxt,
3435
struct desc_ptr *ptr))
3436
{
3437
struct desc_ptr desc_ptr;
3438
3439
if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3440
ctxt->ops->cpl(ctxt) > 0)
3441
return emulate_gp(ctxt, 0);
3442
3443
if (ctxt->mode == X86EMUL_MODE_PROT64)
3444
ctxt->op_bytes = 8;
3445
get(ctxt, &desc_ptr);
3446
if (ctxt->op_bytes == 2) {
3447
ctxt->op_bytes = 4;
3448
desc_ptr.address &= 0x00ffffff;
3449
}
3450
/* Disable writeback. */
3451
ctxt->dst.type = OP_NONE;
3452
return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3453
&desc_ptr, 2 + ctxt->op_bytes);
3454
}
3455
3456
static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3457
{
3458
return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3459
}
3460
3461
static int em_sidt(struct x86_emulate_ctxt *ctxt)
3462
{
3463
return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3464
}
3465
3466
static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3467
{
3468
struct desc_ptr desc_ptr;
3469
int rc;
3470
3471
if (ctxt->mode == X86EMUL_MODE_PROT64)
3472
ctxt->op_bytes = 8;
3473
rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3474
&desc_ptr.size, &desc_ptr.address,
3475
ctxt->op_bytes);
3476
if (rc != X86EMUL_CONTINUE)
3477
return rc;
3478
if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3479
emul_is_noncanonical_address(desc_ptr.address, ctxt,
3480
X86EMUL_F_DT_LOAD))
3481
return emulate_gp(ctxt, 0);
3482
if (lgdt)
3483
ctxt->ops->set_gdt(ctxt, &desc_ptr);
3484
else
3485
ctxt->ops->set_idt(ctxt, &desc_ptr);
3486
/* Disable writeback. */
3487
ctxt->dst.type = OP_NONE;
3488
return X86EMUL_CONTINUE;
3489
}
3490
3491
static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3492
{
3493
return em_lgdt_lidt(ctxt, true);
3494
}
3495
3496
static int em_lidt(struct x86_emulate_ctxt *ctxt)
3497
{
3498
return em_lgdt_lidt(ctxt, false);
3499
}
3500
3501
static int em_smsw(struct x86_emulate_ctxt *ctxt)
3502
{
3503
if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3504
ctxt->ops->cpl(ctxt) > 0)
3505
return emulate_gp(ctxt, 0);
3506
3507
if (ctxt->dst.type == OP_MEM)
3508
ctxt->dst.bytes = 2;
3509
ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3510
return X86EMUL_CONTINUE;
3511
}
3512
3513
static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3514
{
3515
ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3516
| (ctxt->src.val & 0x0f));
3517
ctxt->dst.type = OP_NONE;
3518
return X86EMUL_CONTINUE;
3519
}
3520
3521
static int em_loop(struct x86_emulate_ctxt *ctxt)
3522
{
3523
int rc = X86EMUL_CONTINUE;
3524
3525
register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3526
if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3527
(ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3528
rc = jmp_rel(ctxt, ctxt->src.val);
3529
3530
return rc;
3531
}
3532
3533
static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3534
{
3535
int rc = X86EMUL_CONTINUE;
3536
3537
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3538
rc = jmp_rel(ctxt, ctxt->src.val);
3539
3540
return rc;
3541
}
3542
3543
static int em_in(struct x86_emulate_ctxt *ctxt)
3544
{
3545
if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3546
&ctxt->dst.val))
3547
return X86EMUL_IO_NEEDED;
3548
3549
return X86EMUL_CONTINUE;
3550
}
3551
3552
static int em_out(struct x86_emulate_ctxt *ctxt)
3553
{
3554
ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3555
&ctxt->src.val, 1);
3556
/* Disable writeback. */
3557
ctxt->dst.type = OP_NONE;
3558
return X86EMUL_CONTINUE;
3559
}
3560
3561
static int em_cli(struct x86_emulate_ctxt *ctxt)
3562
{
3563
if (emulator_bad_iopl(ctxt))
3564
return emulate_gp(ctxt, 0);
3565
3566
ctxt->eflags &= ~X86_EFLAGS_IF;
3567
return X86EMUL_CONTINUE;
3568
}
3569
3570
static int em_sti(struct x86_emulate_ctxt *ctxt)
3571
{
3572
if (emulator_bad_iopl(ctxt))
3573
return emulate_gp(ctxt, 0);
3574
3575
ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3576
ctxt->eflags |= X86_EFLAGS_IF;
3577
return X86EMUL_CONTINUE;
3578
}
3579
3580
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3581
{
3582
u32 eax, ebx, ecx, edx;
3583
u64 msr = 0;
3584
3585
ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3586
if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3587
ctxt->ops->cpl(ctxt)) {
3588
return emulate_gp(ctxt, 0);
3589
}
3590
3591
eax = reg_read(ctxt, VCPU_REGS_RAX);
3592
ecx = reg_read(ctxt, VCPU_REGS_RCX);
3593
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3594
*reg_write(ctxt, VCPU_REGS_RAX) = eax;
3595
*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3596
*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3597
*reg_write(ctxt, VCPU_REGS_RDX) = edx;
3598
return X86EMUL_CONTINUE;
3599
}
3600
3601
static int em_sahf(struct x86_emulate_ctxt *ctxt)
3602
{
3603
u32 flags;
3604
3605
flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3606
X86_EFLAGS_SF;
3607
flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3608
3609
ctxt->eflags &= ~0xffUL;
3610
ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3611
return X86EMUL_CONTINUE;
3612
}
3613
3614
static int em_lahf(struct x86_emulate_ctxt *ctxt)
3615
{
3616
*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3617
*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3618
return X86EMUL_CONTINUE;
3619
}
3620
3621
static int em_bswap(struct x86_emulate_ctxt *ctxt)
3622
{
3623
switch (ctxt->op_bytes) {
3624
#ifdef CONFIG_X86_64
3625
case 8:
3626
asm("bswap %0" : "+r"(ctxt->dst.val));
3627
break;
3628
#endif
3629
default:
3630
asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3631
break;
3632
}
3633
return X86EMUL_CONTINUE;
3634
}
3635
3636
static int em_clflush(struct x86_emulate_ctxt *ctxt)
3637
{
3638
/* emulating clflush regardless of cpuid */
3639
return X86EMUL_CONTINUE;
3640
}
3641
3642
static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3643
{
3644
/* emulating clflushopt regardless of cpuid */
3645
return X86EMUL_CONTINUE;
3646
}
3647
3648
static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3649
{
3650
ctxt->dst.val = (s32) ctxt->src.val;
3651
return X86EMUL_CONTINUE;
3652
}
3653
3654
static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3655
{
3656
if (!ctxt->ops->guest_has_fxsr(ctxt))
3657
return emulate_ud(ctxt);
3658
3659
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3660
return emulate_nm(ctxt);
3661
3662
/*
3663
* Don't emulate a case that should never be hit, instead of working
3664
* around a lack of fxsave64/fxrstor64 on old compilers.
3665
*/
3666
if (ctxt->mode >= X86EMUL_MODE_PROT64)
3667
return X86EMUL_UNHANDLEABLE;
3668
3669
return X86EMUL_CONTINUE;
3670
}
3671
3672
/*
3673
* Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3674
* and restore MXCSR.
3675
*/
3676
static size_t __fxstate_size(int nregs)
3677
{
3678
return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3679
}
3680
3681
static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3682
{
3683
bool cr4_osfxsr;
3684
if (ctxt->mode == X86EMUL_MODE_PROT64)
3685
return __fxstate_size(16);
3686
3687
cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
3688
return __fxstate_size(cr4_osfxsr ? 8 : 0);
3689
}
3690
3691
/*
3692
* FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3693
* 1) 16 bit mode
3694
* 2) 32 bit mode
3695
* - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3696
* preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3697
* save and restore
3698
* 3) 64-bit mode with REX.W prefix
3699
* - like (2), but XMM 8-15 are being saved and restored
3700
* 4) 64-bit mode without REX.W prefix
3701
* - like (3), but FIP and FDP are 64 bit
3702
*
3703
* Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3704
* desired result. (4) is not emulated.
3705
*
3706
* Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3707
* and FPU DS) should match.
3708
*/
3709
static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3710
{
3711
struct fxregs_state fx_state;
3712
int rc;
3713
3714
rc = check_fxsr(ctxt);
3715
if (rc != X86EMUL_CONTINUE)
3716
return rc;
3717
3718
kvm_fpu_get();
3719
3720
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3721
3722
kvm_fpu_put();
3723
3724
if (rc != X86EMUL_CONTINUE)
3725
return rc;
3726
3727
return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
3728
fxstate_size(ctxt));
3729
}
3730
3731
/*
3732
* FXRSTOR might restore XMM registers not provided by the guest. Fill
3733
* in the host registers (via FXSAVE) instead, so they won't be modified.
3734
* (preemption has to stay disabled until FXRSTOR).
3735
*
3736
* Use noinline to keep the stack for other functions called by callers small.
3737
*/
3738
static noinline int fxregs_fixup(struct fxregs_state *fx_state,
3739
const size_t used_size)
3740
{
3741
struct fxregs_state fx_tmp;
3742
int rc;
3743
3744
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
3745
memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
3746
__fxstate_size(16) - used_size);
3747
3748
return rc;
3749
}
3750
3751
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3752
{
3753
struct fxregs_state fx_state;
3754
int rc;
3755
size_t size;
3756
3757
rc = check_fxsr(ctxt);
3758
if (rc != X86EMUL_CONTINUE)
3759
return rc;
3760
3761
size = fxstate_size(ctxt);
3762
rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3763
if (rc != X86EMUL_CONTINUE)
3764
return rc;
3765
3766
kvm_fpu_get();
3767
3768
if (size < __fxstate_size(16)) {
3769
rc = fxregs_fixup(&fx_state, size);
3770
if (rc != X86EMUL_CONTINUE)
3771
goto out;
3772
}
3773
3774
if (fx_state.mxcsr >> 16) {
3775
rc = emulate_gp(ctxt, 0);
3776
goto out;
3777
}
3778
3779
if (rc == X86EMUL_CONTINUE)
3780
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3781
3782
out:
3783
kvm_fpu_put();
3784
3785
return rc;
3786
}
3787
3788
static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
3789
{
3790
u32 eax, ecx, edx;
3791
3792
if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
3793
return emulate_ud(ctxt);
3794
3795
eax = reg_read(ctxt, VCPU_REGS_RAX);
3796
edx = reg_read(ctxt, VCPU_REGS_RDX);
3797
ecx = reg_read(ctxt, VCPU_REGS_RCX);
3798
3799
if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
3800
return emulate_gp(ctxt, 0);
3801
3802
return X86EMUL_CONTINUE;
3803
}
3804
3805
static bool valid_cr(int nr)
3806
{
3807
switch (nr) {
3808
case 0:
3809
case 2 ... 4:
3810
case 8:
3811
return true;
3812
default:
3813
return false;
3814
}
3815
}
3816
3817
static int check_cr_access(struct x86_emulate_ctxt *ctxt)
3818
{
3819
if (!valid_cr(ctxt->modrm_reg))
3820
return emulate_ud(ctxt);
3821
3822
return X86EMUL_CONTINUE;
3823
}
3824
3825
static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3826
{
3827
int dr = ctxt->modrm_reg;
3828
u64 cr4;
3829
3830
if (dr > 7)
3831
return emulate_ud(ctxt);
3832
3833
cr4 = ctxt->ops->get_cr(ctxt, 4);
3834
if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3835
return emulate_ud(ctxt);
3836
3837
if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) {
3838
ulong dr6;
3839
3840
dr6 = ctxt->ops->get_dr(ctxt, 6);
3841
dr6 &= ~DR_TRAP_BITS;
3842
dr6 |= DR6_BD | DR6_ACTIVE_LOW;
3843
ctxt->ops->set_dr(ctxt, 6, dr6);
3844
return emulate_db(ctxt);
3845
}
3846
3847
return X86EMUL_CONTINUE;
3848
}
3849
3850
static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3851
{
3852
u64 new_val = ctxt->src.val64;
3853
int dr = ctxt->modrm_reg;
3854
3855
if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3856
return emulate_gp(ctxt, 0);
3857
3858
return check_dr_read(ctxt);
3859
}
3860
3861
static int check_svme(struct x86_emulate_ctxt *ctxt)
3862
{
3863
u64 efer = 0;
3864
3865
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3866
3867
if (!(efer & EFER_SVME))
3868
return emulate_ud(ctxt);
3869
3870
return X86EMUL_CONTINUE;
3871
}
3872
3873
static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3874
{
3875
u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3876
3877
/* Valid physical address? */
3878
if (rax & 0xffff000000000000ULL)
3879
return emulate_gp(ctxt, 0);
3880
3881
return check_svme(ctxt);
3882
}
3883
3884
static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3885
{
3886
u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3887
3888
if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3889
return emulate_gp(ctxt, 0);
3890
3891
return X86EMUL_CONTINUE;
3892
}
3893
3894
static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3895
{
3896
u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3897
u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3898
3899
/*
3900
* VMware allows access to these Pseduo-PMCs even when read via RDPMC
3901
* in Ring3 when CR4.PCE=0.
3902
*/
3903
if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
3904
return X86EMUL_CONTINUE;
3905
3906
/*
3907
* If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
3908
* check however is unnecessary because CPL is always 0 outside
3909
* protected mode.
3910
*/
3911
if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3912
ctxt->ops->check_rdpmc_early(ctxt, rcx))
3913
return emulate_gp(ctxt, 0);
3914
3915
return X86EMUL_CONTINUE;
3916
}
3917
3918
static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3919
{
3920
ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3921
if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes))
3922
return emulate_gp(ctxt, 0);
3923
3924
return X86EMUL_CONTINUE;
3925
}
3926
3927
static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3928
{
3929
ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3930
if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes))
3931
return emulate_gp(ctxt, 0);
3932
3933
return X86EMUL_CONTINUE;
3934
}
3935
3936
#define D(_y) { .flags = (_y) }
3937
#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3938
#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3939
.intercept = x86_intercept_##_i, .check_perm = (_p) }
3940
#define N D(NotImpl)
3941
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3942
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3943
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3944
#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
3945
#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
3946
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3947
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3948
#define II(_f, _e, _i) \
3949
{ .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
3950
#define IIP(_f, _e, _i, _p) \
3951
{ .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
3952
.intercept = x86_intercept_##_i, .check_perm = (_p) }
3953
#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
3954
3955
#define D2bv(_f) D((_f) | ByteOp), D(_f)
3956
#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
3957
#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
3958
#define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
3959
#define I2bvIP(_f, _e, _i, _p) \
3960
IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
3961
3962
#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \
3963
I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
3964
I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3965
3966
static const struct opcode ud = I(SrcNone, emulate_ud);
3967
3968
static const struct opcode group7_rm0[] = {
3969
N,
3970
I(SrcNone | Priv | EmulateOnUD, em_hypercall),
3971
N, N, N, N, N, N,
3972
};
3973
3974
static const struct opcode group7_rm1[] = {
3975
DI(SrcNone | Priv, monitor),
3976
DI(SrcNone | Priv, mwait),
3977
N, N, N, N, N, N,
3978
};
3979
3980
static const struct opcode group7_rm2[] = {
3981
N,
3982
II(ImplicitOps | Priv, em_xsetbv, xsetbv),
3983
N, N, N, N, N, N,
3984
};
3985
3986
static const struct opcode group7_rm3[] = {
3987
DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
3988
II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
3989
DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
3990
DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
3991
DIP(SrcNone | Prot | Priv, stgi, check_svme),
3992
DIP(SrcNone | Prot | Priv, clgi, check_svme),
3993
DIP(SrcNone | Prot | Priv, skinit, check_svme),
3994
DIP(SrcNone | Prot | Priv, invlpga, check_svme),
3995
};
3996
3997
static const struct opcode group7_rm7[] = {
3998
N,
3999
DIP(SrcNone, rdtscp, check_rdtsc),
4000
N, N, N, N, N, N,
4001
};
4002
4003
static const struct opcode group1[] = {
4004
I(Lock, em_add),
4005
I(Lock | PageTable, em_or),
4006
I(Lock, em_adc),
4007
I(Lock, em_sbb),
4008
I(Lock | PageTable, em_and),
4009
I(Lock, em_sub),
4010
I(Lock, em_xor),
4011
I(NoWrite, em_cmp),
4012
};
4013
4014
static const struct opcode group1A[] = {
4015
I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4016
};
4017
4018
static const struct opcode group2[] = {
4019
I(DstMem | ModRM, em_rol),
4020
I(DstMem | ModRM, em_ror),
4021
I(DstMem | ModRM, em_rcl),
4022
I(DstMem | ModRM, em_rcr),
4023
I(DstMem | ModRM, em_shl),
4024
I(DstMem | ModRM, em_shr),
4025
I(DstMem | ModRM, em_shl),
4026
I(DstMem | ModRM, em_sar),
4027
};
4028
4029
static const struct opcode group3[] = {
4030
I(DstMem | SrcImm | NoWrite, em_test),
4031
I(DstMem | SrcImm | NoWrite, em_test),
4032
I(DstMem | SrcNone | Lock, em_not),
4033
I(DstMem | SrcNone | Lock, em_neg),
4034
I(DstXacc | Src2Mem, em_mul_ex),
4035
I(DstXacc | Src2Mem, em_imul_ex),
4036
I(DstXacc | Src2Mem, em_div_ex),
4037
I(DstXacc | Src2Mem, em_idiv_ex),
4038
};
4039
4040
static const struct opcode group4[] = {
4041
I(ByteOp | DstMem | SrcNone | Lock, em_inc),
4042
I(ByteOp | DstMem | SrcNone | Lock, em_dec),
4043
N, N, N, N, N, N,
4044
};
4045
4046
static const struct opcode group5[] = {
4047
I(DstMem | SrcNone | Lock, em_inc),
4048
I(DstMem | SrcNone | Lock, em_dec),
4049
I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs),
4050
I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far),
4051
I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
4052
I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4053
I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4054
};
4055
4056
static const struct opcode group6[] = {
4057
II(Prot | DstMem, em_sldt, sldt),
4058
II(Prot | DstMem, em_str, str),
4059
II(Prot | Priv | SrcMem16, em_lldt, lldt),
4060
II(Prot | Priv | SrcMem16, em_ltr, ltr),
4061
N, N, N, N,
4062
};
4063
4064
static const struct group_dual group7 = { {
4065
II(Mov | DstMem, em_sgdt, sgdt),
4066
II(Mov | DstMem, em_sidt, sidt),
4067
II(SrcMem | Priv, em_lgdt, lgdt),
4068
II(SrcMem | Priv, em_lidt, lidt),
4069
II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4070
II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4071
II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4072
}, {
4073
EXT(0, group7_rm0),
4074
EXT(0, group7_rm1),
4075
EXT(0, group7_rm2),
4076
EXT(0, group7_rm3),
4077
II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4078
II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4079
EXT(0, group7_rm7),
4080
} };
4081
4082
static const struct opcode group8[] = {
4083
N, N, N, N,
4084
I(DstMem | SrcImmByte | NoWrite, em_bt),
4085
I(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4086
I(DstMem | SrcImmByte | Lock, em_btr),
4087
I(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4088
};
4089
4090
/*
4091
* The "memory" destination is actually always a register, since we come
4092
* from the register case of group9.
4093
*/
4094
static const struct gprefix pfx_0f_c7_7 = {
4095
N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4096
};
4097
4098
4099
static const struct group_dual group9 = { {
4100
N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4101
}, {
4102
N, N, N, N, N, N, N,
4103
GP(0, &pfx_0f_c7_7),
4104
} };
4105
4106
static const struct opcode group11[] = {
4107
I(DstMem | SrcImm | Mov | PageTable, em_mov),
4108
X7(D(Undefined)),
4109
};
4110
4111
static const struct gprefix pfx_0f_ae_7 = {
4112
I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4113
};
4114
4115
static const struct group_dual group15 = { {
4116
I(ModRM | Aligned16, em_fxsave),
4117
I(ModRM | Aligned16, em_fxrstor),
4118
N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4119
}, {
4120
N, N, N, N, N, N, N, N,
4121
} };
4122
4123
static const struct gprefix pfx_0f_6f_0f_7f = {
4124
I(Mmx, em_mov), I(Sse | Avx | Aligned, em_mov), N, I(Sse | Avx | Unaligned, em_mov),
4125
};
4126
4127
static const struct instr_dual instr_dual_0f_2b = {
4128
I(0, em_mov), N
4129
};
4130
4131
static const struct gprefix pfx_0f_2b = {
4132
ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4133
};
4134
4135
static const struct gprefix pfx_0f_10_0f_11 = {
4136
I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4137
};
4138
4139
static const struct gprefix pfx_0f_28_0f_29 = {
4140
I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4141
};
4142
4143
static const struct gprefix pfx_0f_e7_0f_38_2a = {
4144
N, I(Sse | Avx, em_mov), N, N,
4145
};
4146
4147
static const struct escape escape_d9 = { {
4148
N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4149
}, {
4150
/* 0xC0 - 0xC7 */
4151
N, N, N, N, N, N, N, N,
4152
/* 0xC8 - 0xCF */
4153
N, N, N, N, N, N, N, N,
4154
/* 0xD0 - 0xC7 */
4155
N, N, N, N, N, N, N, N,
4156
/* 0xD8 - 0xDF */
4157
N, N, N, N, N, N, N, N,
4158
/* 0xE0 - 0xE7 */
4159
N, N, N, N, N, N, N, N,
4160
/* 0xE8 - 0xEF */
4161
N, N, N, N, N, N, N, N,
4162
/* 0xF0 - 0xF7 */
4163
N, N, N, N, N, N, N, N,
4164
/* 0xF8 - 0xFF */
4165
N, N, N, N, N, N, N, N,
4166
} };
4167
4168
static const struct escape escape_db = { {
4169
N, N, N, N, N, N, N, N,
4170
}, {
4171
/* 0xC0 - 0xC7 */
4172
N, N, N, N, N, N, N, N,
4173
/* 0xC8 - 0xCF */
4174
N, N, N, N, N, N, N, N,
4175
/* 0xD0 - 0xC7 */
4176
N, N, N, N, N, N, N, N,
4177
/* 0xD8 - 0xDF */
4178
N, N, N, N, N, N, N, N,
4179
/* 0xE0 - 0xE7 */
4180
N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4181
/* 0xE8 - 0xEF */
4182
N, N, N, N, N, N, N, N,
4183
/* 0xF0 - 0xF7 */
4184
N, N, N, N, N, N, N, N,
4185
/* 0xF8 - 0xFF */
4186
N, N, N, N, N, N, N, N,
4187
} };
4188
4189
static const struct escape escape_dd = { {
4190
N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4191
}, {
4192
/* 0xC0 - 0xC7 */
4193
N, N, N, N, N, N, N, N,
4194
/* 0xC8 - 0xCF */
4195
N, N, N, N, N, N, N, N,
4196
/* 0xD0 - 0xC7 */
4197
N, N, N, N, N, N, N, N,
4198
/* 0xD8 - 0xDF */
4199
N, N, N, N, N, N, N, N,
4200
/* 0xE0 - 0xE7 */
4201
N, N, N, N, N, N, N, N,
4202
/* 0xE8 - 0xEF */
4203
N, N, N, N, N, N, N, N,
4204
/* 0xF0 - 0xF7 */
4205
N, N, N, N, N, N, N, N,
4206
/* 0xF8 - 0xFF */
4207
N, N, N, N, N, N, N, N,
4208
} };
4209
4210
static const struct instr_dual instr_dual_0f_c3 = {
4211
I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4212
};
4213
4214
static const struct mode_dual mode_dual_63 = {
4215
N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4216
};
4217
4218
static const struct instr_dual instr_dual_8d = {
4219
D(DstReg | SrcMem | ModRM | NoAccess), N
4220
};
4221
4222
static const struct opcode opcode_table[256] = {
4223
/* 0x00 - 0x07 */
4224
I6ALU(Lock, em_add),
4225
I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4226
I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4227
/* 0x08 - 0x0F */
4228
I6ALU(Lock | PageTable, em_or),
4229
I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4230
N,
4231
/* 0x10 - 0x17 */
4232
I6ALU(Lock, em_adc),
4233
I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4234
I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4235
/* 0x18 - 0x1F */
4236
I6ALU(Lock, em_sbb),
4237
I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4238
I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4239
/* 0x20 - 0x27 */
4240
I6ALU(Lock | PageTable, em_and), N, N,
4241
/* 0x28 - 0x2F */
4242
I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4243
/* 0x30 - 0x37 */
4244
I6ALU(Lock, em_xor), N, N,
4245
/* 0x38 - 0x3F */
4246
I6ALU(NoWrite, em_cmp), N, N,
4247
/* 0x40 - 0x4F */
4248
X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)),
4249
/* 0x50 - 0x57 */
4250
X8(I(SrcReg | Stack, em_push)),
4251
/* 0x58 - 0x5F */
4252
X8(I(DstReg | Stack, em_pop)),
4253
/* 0x60 - 0x67 */
4254
I(ImplicitOps | Stack | No64, em_pusha),
4255
I(ImplicitOps | Stack | No64, em_popa),
4256
N, MD(ModRM, &mode_dual_63),
4257
N, N, N, N,
4258
/* 0x68 - 0x6F */
4259
I(SrcImm | Mov | Stack, em_push),
4260
I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4261
I(SrcImmByte | Mov | Stack, em_push),
4262
I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4263
I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4264
I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4265
/* 0x70 - 0x7F */
4266
X16(D(SrcImmByte | NearBranch | IsBranch)),
4267
/* 0x80 - 0x87 */
4268
G(ByteOp | DstMem | SrcImm, group1),
4269
G(DstMem | SrcImm, group1),
4270
G(ByteOp | DstMem | SrcImm | No64, group1),
4271
G(DstMem | SrcImmByte, group1),
4272
I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4273
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4274
/* 0x88 - 0x8F */
4275
I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4276
I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4277
I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4278
ID(0, &instr_dual_8d),
4279
I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4280
G(0, group1A),
4281
/* 0x90 - 0x97 */
4282
DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4283
/* 0x98 - 0x9F */
4284
D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4285
I(SrcImmFAddr | No64 | IsBranch | ShadowStack, em_call_far), N,
4286
II(ImplicitOps | Stack, em_pushf, pushf),
4287
II(ImplicitOps | Stack, em_popf, popf),
4288
I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4289
/* 0xA0 - 0xA7 */
4290
I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4291
I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4292
I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4293
I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4294
/* 0xA8 - 0xAF */
4295
I2bv(DstAcc | SrcImm | NoWrite, em_test),
4296
I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4297
I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4298
I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4299
/* 0xB0 - 0xB7 */
4300
X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4301
/* 0xB8 - 0xBF */
4302
X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4303
/* 0xC0 - 0xC7 */
4304
G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4305
I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm),
4306
I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret),
4307
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4308
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4309
G(ByteOp, group11), G(0, group11),
4310
/* 0xC8 - 0xCF */
4311
I(Stack | SrcImmU16 | Src2ImmByte, em_enter),
4312
I(Stack, em_leave),
4313
I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
4314
I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
4315
D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
4316
D(ImplicitOps | No64 | IsBranch),
4317
II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
4318
/* 0xD0 - 0xD7 */
4319
G(Src2One | ByteOp, group2), G(Src2One, group2),
4320
G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4321
I(DstAcc | SrcImmUByte | No64, em_aam),
4322
I(DstAcc | SrcImmUByte | No64, em_aad),
4323
I(DstAcc | ByteOp | No64, em_salc),
4324
I(DstAcc | SrcXLat | ByteOp, em_mov),
4325
/* 0xD8 - 0xDF */
4326
N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4327
/* 0xE0 - 0xE7 */
4328
X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4329
I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4330
I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4331
I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4332
/* 0xE8 - 0xEF */
4333
I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
4334
D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4335
I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4336
D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4337
I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4338
I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4339
/* 0xF0 - 0xF7 */
4340
N, DI(ImplicitOps, icebp), N, N,
4341
DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4342
G(ByteOp, group3), G(0, group3),
4343
/* 0xF8 - 0xFF */
4344
D(ImplicitOps), D(ImplicitOps),
4345
I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4346
D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4347
};
4348
4349
static const struct opcode twobyte_table[256] = {
4350
/* 0x00 - 0x0F */
4351
G(0, group6), GD(0, &group7), N, N,
4352
N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_syscall),
4353
II(ImplicitOps | Priv, em_clts, clts), N,
4354
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4355
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4356
/* 0x10 - 0x1F */
4357
GP(ModRM | DstReg | SrcMem | Mov | Sse | Avx, &pfx_0f_10_0f_11),
4358
GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_10_0f_11),
4359
N, N, N, N, N, N,
4360
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4361
D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4362
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4363
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4364
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4365
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4366
/* 0x20 - 0x2F */
4367
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4368
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4369
IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4370
check_cr_access),
4371
IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4372
check_dr_write),
4373
N, N, N, N,
4374
GP(ModRM | DstReg | SrcMem | Mov | Sse | Avx, &pfx_0f_28_0f_29),
4375
GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_28_0f_29),
4376
N, GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_2b),
4377
N, N, N, N,
4378
/* 0x30 - 0x3F */
4379
II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4380
IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4381
II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4382
IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4383
I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter),
4384
I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
4385
N, N,
4386
N, N, N, N, N, N, N, N,
4387
/* 0x40 - 0x4F */
4388
X16(D(DstReg | SrcMem | ModRM)),
4389
/* 0x50 - 0x5F */
4390
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4391
/* 0x60 - 0x6F */
4392
N, N, N, N,
4393
N, N, N, N,
4394
N, N, N, N,
4395
N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4396
/* 0x70 - 0x7F */
4397
N, N, N, N,
4398
N, N, N, N,
4399
N, N, N, N,
4400
N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4401
/* 0x80 - 0x8F */
4402
X16(D(SrcImm | NearBranch | IsBranch)),
4403
/* 0x90 - 0x9F */
4404
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4405
/* 0xA0 - 0xA7 */
4406
I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4407
II(ImplicitOps, em_cpuid, cpuid),
4408
I(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4409
I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4410
I(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4411
/* 0xA8 - 0xAF */
4412
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4413
II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4414
I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4415
I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4416
I(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4417
GD(0, &group15), I(DstReg | SrcMem | ModRM, em_imul),
4418
/* 0xB0 - 0xB7 */
4419
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4420
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4421
I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4422
I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4423
I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4424
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4425
/* 0xB8 - 0xBF */
4426
N, N,
4427
G(BitOp, group8),
4428
I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4429
I(DstReg | SrcMem | ModRM, em_bsf_c),
4430
I(DstReg | SrcMem | ModRM, em_bsr_c),
4431
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4432
/* 0xC0 - 0xC7 */
4433
I2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4434
N, ID(0, &instr_dual_0f_c3),
4435
N, N, N, GD(0, &group9),
4436
/* 0xC8 - 0xCF */
4437
X8(I(DstReg, em_bswap)),
4438
/* 0xD0 - 0xDF */
4439
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4440
/* 0xE0 - 0xEF */
4441
N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7_0f_38_2a),
4442
N, N, N, N, N, N, N, N,
4443
/* 0xF0 - 0xFF */
4444
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4445
};
4446
4447
static const struct instr_dual instr_dual_0f_38_f0 = {
4448
I(DstReg | SrcMem | Mov, em_movbe), N
4449
};
4450
4451
static const struct instr_dual instr_dual_0f_38_f1 = {
4452
I(DstMem | SrcReg | Mov, em_movbe), N
4453
};
4454
4455
static const struct gprefix three_byte_0f_38_f0 = {
4456
ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N
4457
};
4458
4459
static const struct gprefix three_byte_0f_38_f1 = {
4460
ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N
4461
};
4462
4463
/*
4464
* Insns below are selected by the prefix which indexed by the third opcode
4465
* byte.
4466
*/
4467
static const struct opcode opcode_map_0f_38[256] = {
4468
/* 0x00 - 0x1f */
4469
X16(N), X16(N),
4470
/* 0x20 - 0x2f */
4471
X8(N),
4472
X2(N), GP(SrcReg | DstMem | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
4473
/* 0x30 - 0x7f */
4474
X16(N), X16(N), X16(N), X16(N), X16(N),
4475
/* 0x80 - 0xef */
4476
X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4477
/* 0xf0 - 0xf1 */
4478
GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4479
GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4480
/* 0xf2 - 0xff */
4481
N, N, X4(N), X8(N)
4482
};
4483
4484
#undef D
4485
#undef N
4486
#undef G
4487
#undef GD
4488
#undef I
4489
#undef GP
4490
#undef EXT
4491
#undef MD
4492
#undef ID
4493
4494
#undef D2bv
4495
#undef D2bvIP
4496
#undef I2bv
4497
#undef I2bvIP
4498
#undef I6ALU
4499
4500
static bool is_shstk_instruction(struct x86_emulate_ctxt *ctxt)
4501
{
4502
return ctxt->d & ShadowStack;
4503
}
4504
4505
static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
4506
{
4507
u64 flags = ctxt->d;
4508
4509
if (!(flags & IsBranch))
4510
return false;
4511
4512
/*
4513
* All far JMPs and CALLs (including SYSCALL, SYSENTER, and INTn) are
4514
* indirect and thus affect IBT state. All far RETs (including SYSEXIT
4515
* and IRET) are protected via Shadow Stacks and thus don't affect IBT
4516
* state. IRET #GPs when returning to virtual-8086 and IBT or SHSTK is
4517
* enabled, but that should be handled by IRET emulation (in the very
4518
* unlikely scenario that KVM adds support for fully emulating IRET).
4519
*/
4520
if (!(flags & NearBranch))
4521
return ctxt->execute != em_iret &&
4522
ctxt->execute != em_ret_far &&
4523
ctxt->execute != em_ret_far_imm &&
4524
ctxt->execute != em_sysexit;
4525
4526
switch (flags & SrcMask) {
4527
case SrcReg:
4528
case SrcMem:
4529
case SrcMem16:
4530
case SrcMem32:
4531
return true;
4532
case SrcMemFAddr:
4533
case SrcImmFAddr:
4534
/* Far branches should be handled above. */
4535
WARN_ON_ONCE(1);
4536
return true;
4537
case SrcNone:
4538
case SrcImm:
4539
case SrcImmByte:
4540
/*
4541
* Note, ImmU16 is used only for the stack adjustment operand on ENTER
4542
* and RET instructions. ENTER isn't a branch and RET FAR is handled
4543
* by the NearBranch check above. RET itself isn't an indirect branch.
4544
*/
4545
case SrcImmU16:
4546
return false;
4547
default:
4548
WARN_ONCE(1, "Unexpected Src operand '%llx' on branch",
4549
flags & SrcMask);
4550
return false;
4551
}
4552
}
4553
4554
static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4555
{
4556
unsigned size;
4557
4558
size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4559
if (size == 8)
4560
size = 4;
4561
return size;
4562
}
4563
4564
static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4565
unsigned size, bool sign_extension)
4566
{
4567
int rc = X86EMUL_CONTINUE;
4568
4569
op->type = OP_IMM;
4570
op->bytes = size;
4571
op->addr.mem.ea = ctxt->_eip;
4572
/* NB. Immediates are sign-extended as necessary. */
4573
switch (op->bytes) {
4574
case 1:
4575
op->val = insn_fetch(s8, ctxt);
4576
break;
4577
case 2:
4578
op->val = insn_fetch(s16, ctxt);
4579
break;
4580
case 4:
4581
op->val = insn_fetch(s32, ctxt);
4582
break;
4583
case 8:
4584
op->val = insn_fetch(s64, ctxt);
4585
break;
4586
}
4587
if (!sign_extension) {
4588
switch (op->bytes) {
4589
case 1:
4590
op->val &= 0xff;
4591
break;
4592
case 2:
4593
op->val &= 0xffff;
4594
break;
4595
case 4:
4596
op->val &= 0xffffffff;
4597
break;
4598
}
4599
}
4600
done:
4601
return rc;
4602
}
4603
4604
static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4605
unsigned d)
4606
{
4607
int rc = X86EMUL_CONTINUE;
4608
4609
switch (d) {
4610
case OpReg:
4611
decode_register_operand(ctxt, op);
4612
break;
4613
case OpImmUByte:
4614
rc = decode_imm(ctxt, op, 1, false);
4615
break;
4616
case OpMem:
4617
ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4618
mem_common:
4619
*op = ctxt->memop;
4620
ctxt->memopp = op;
4621
if (ctxt->d & BitOp)
4622
fetch_bit_operand(ctxt);
4623
op->orig_val = op->val;
4624
break;
4625
case OpMem64:
4626
ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4627
goto mem_common;
4628
case OpAcc:
4629
op->type = OP_REG;
4630
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4631
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4632
fetch_register_operand(op);
4633
break;
4634
case OpAccLo:
4635
op->type = OP_REG;
4636
op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4637
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4638
fetch_register_operand(op);
4639
break;
4640
case OpAccHi:
4641
if (ctxt->d & ByteOp) {
4642
op->type = OP_NONE;
4643
break;
4644
}
4645
op->type = OP_REG;
4646
op->bytes = ctxt->op_bytes;
4647
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4648
fetch_register_operand(op);
4649
break;
4650
case OpDI:
4651
op->type = OP_MEM;
4652
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4653
op->addr.mem.ea =
4654
register_address(ctxt, VCPU_REGS_RDI);
4655
op->addr.mem.seg = VCPU_SREG_ES;
4656
op->val = 0;
4657
op->count = 1;
4658
break;
4659
case OpDX:
4660
op->type = OP_REG;
4661
op->bytes = 2;
4662
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4663
fetch_register_operand(op);
4664
break;
4665
case OpCL:
4666
op->type = OP_IMM;
4667
op->bytes = 1;
4668
op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4669
break;
4670
case OpImmByte:
4671
rc = decode_imm(ctxt, op, 1, true);
4672
break;
4673
case OpOne:
4674
op->type = OP_IMM;
4675
op->bytes = 1;
4676
op->val = 1;
4677
break;
4678
case OpImm:
4679
rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4680
break;
4681
case OpImm64:
4682
rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4683
break;
4684
case OpMem8:
4685
ctxt->memop.bytes = 1;
4686
if (ctxt->memop.type == OP_REG) {
4687
ctxt->memop.addr.reg = decode_register(ctxt,
4688
ctxt->modrm_rm, true);
4689
fetch_register_operand(&ctxt->memop);
4690
}
4691
goto mem_common;
4692
case OpMem16:
4693
ctxt->memop.bytes = 2;
4694
goto mem_common;
4695
case OpMem32:
4696
ctxt->memop.bytes = 4;
4697
goto mem_common;
4698
case OpImmU16:
4699
rc = decode_imm(ctxt, op, 2, false);
4700
break;
4701
case OpImmU:
4702
rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4703
break;
4704
case OpSI:
4705
op->type = OP_MEM;
4706
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4707
op->addr.mem.ea =
4708
register_address(ctxt, VCPU_REGS_RSI);
4709
op->addr.mem.seg = ctxt->seg_override;
4710
op->val = 0;
4711
op->count = 1;
4712
break;
4713
case OpXLat:
4714
op->type = OP_MEM;
4715
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4716
op->addr.mem.ea =
4717
address_mask(ctxt,
4718
reg_read(ctxt, VCPU_REGS_RBX) +
4719
(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4720
op->addr.mem.seg = ctxt->seg_override;
4721
op->val = 0;
4722
break;
4723
case OpImmFAddr:
4724
op->type = OP_IMM;
4725
op->addr.mem.ea = ctxt->_eip;
4726
op->bytes = ctxt->op_bytes + 2;
4727
insn_fetch_arr(op->valptr, op->bytes, ctxt);
4728
break;
4729
case OpMemFAddr:
4730
ctxt->memop.bytes = ctxt->op_bytes + 2;
4731
goto mem_common;
4732
case OpES:
4733
op->type = OP_IMM;
4734
op->val = VCPU_SREG_ES;
4735
break;
4736
case OpCS:
4737
op->type = OP_IMM;
4738
op->val = VCPU_SREG_CS;
4739
break;
4740
case OpSS:
4741
op->type = OP_IMM;
4742
op->val = VCPU_SREG_SS;
4743
break;
4744
case OpDS:
4745
op->type = OP_IMM;
4746
op->val = VCPU_SREG_DS;
4747
break;
4748
case OpFS:
4749
op->type = OP_IMM;
4750
op->val = VCPU_SREG_FS;
4751
break;
4752
case OpGS:
4753
op->type = OP_IMM;
4754
op->val = VCPU_SREG_GS;
4755
break;
4756
case OpImplicit:
4757
/* Special instructions do their own operand decoding. */
4758
default:
4759
op->type = OP_NONE; /* Disable writeback. */
4760
break;
4761
}
4762
4763
done:
4764
return rc;
4765
}
4766
4767
static int x86_decode_avx(struct x86_emulate_ctxt *ctxt,
4768
u8 vex_1st, u8 vex_2nd, struct opcode *opcode)
4769
{
4770
u8 vex_3rd, map, pp, l, v;
4771
int rc = X86EMUL_CONTINUE;
4772
4773
if (ctxt->rep_prefix || ctxt->op_prefix || ctxt->rex_prefix)
4774
goto ud;
4775
4776
if (vex_1st == 0xc5) {
4777
/* Expand RVVVVlpp to VEX3 format */
4778
vex_3rd = vex_2nd & ~0x80; /* VVVVlpp from VEX2, w=0 */
4779
vex_2nd = (vex_2nd & 0x80) | 0x61; /* R from VEX2, X=1 B=1 mmmmm=00001 */
4780
} else {
4781
vex_3rd = insn_fetch(u8, ctxt);
4782
}
4783
4784
/* vex_2nd = RXBmmmmm, vex_3rd = wVVVVlpp. Fix polarity */
4785
vex_2nd ^= 0xE0; /* binary 11100000 */
4786
vex_3rd ^= 0x78; /* binary 01111000 */
4787
4788
ctxt->rex_prefix = REX_PREFIX;
4789
ctxt->rex_bits = (vex_2nd & 0xE0) >> 5; /* RXB */
4790
ctxt->rex_bits |= (vex_3rd & 0x80) >> 4; /* w */
4791
if (ctxt->rex_bits && ctxt->mode != X86EMUL_MODE_PROT64)
4792
goto ud;
4793
4794
map = vex_2nd & 0x1f;
4795
v = (vex_3rd >> 3) & 0xf;
4796
l = vex_3rd & 0x4;
4797
pp = vex_3rd & 0x3;
4798
4799
ctxt->b = insn_fetch(u8, ctxt);
4800
switch (map) {
4801
case 1:
4802
ctxt->opcode_len = 2;
4803
*opcode = twobyte_table[ctxt->b];
4804
break;
4805
case 2:
4806
ctxt->opcode_len = 3;
4807
*opcode = opcode_map_0f_38[ctxt->b];
4808
break;
4809
case 3:
4810
/* no 0f 3a instructions are supported yet */
4811
return X86EMUL_UNHANDLEABLE;
4812
default:
4813
goto ud;
4814
}
4815
4816
/*
4817
* No three operand instructions are supported yet; those that
4818
* *are* marked with the Avx flag reserve the VVVV flag.
4819
*/
4820
if (v)
4821
goto ud;
4822
4823
if (l)
4824
ctxt->op_bytes = 32;
4825
else
4826
ctxt->op_bytes = 16;
4827
4828
switch (pp) {
4829
case 0: break;
4830
case 1: ctxt->op_prefix = true; break;
4831
case 2: ctxt->rep_prefix = 0xf3; break;
4832
case 3: ctxt->rep_prefix = 0xf2; break;
4833
}
4834
4835
done:
4836
return rc;
4837
ud:
4838
*opcode = ud;
4839
return rc;
4840
}
4841
4842
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
4843
{
4844
int rc = X86EMUL_CONTINUE;
4845
int mode = ctxt->mode;
4846
int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4847
bool vex_prefix = false;
4848
bool has_seg_override = false;
4849
struct opcode opcode;
4850
u16 dummy;
4851
struct desc_struct desc;
4852
4853
ctxt->memop.type = OP_NONE;
4854
ctxt->memopp = NULL;
4855
ctxt->_eip = ctxt->eip;
4856
ctxt->fetch.ptr = ctxt->fetch.data;
4857
ctxt->fetch.end = ctxt->fetch.data + insn_len;
4858
ctxt->opcode_len = 1;
4859
ctxt->intercept = x86_intercept_none;
4860
if (insn_len > 0)
4861
memcpy(ctxt->fetch.data, insn, insn_len);
4862
else {
4863
rc = __do_insn_fetch_bytes(ctxt, 1);
4864
if (rc != X86EMUL_CONTINUE)
4865
goto done;
4866
}
4867
4868
switch (mode) {
4869
case X86EMUL_MODE_REAL:
4870
case X86EMUL_MODE_VM86:
4871
def_op_bytes = def_ad_bytes = 2;
4872
ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
4873
if (desc.d)
4874
def_op_bytes = def_ad_bytes = 4;
4875
break;
4876
case X86EMUL_MODE_PROT16:
4877
def_op_bytes = def_ad_bytes = 2;
4878
break;
4879
case X86EMUL_MODE_PROT32:
4880
def_op_bytes = def_ad_bytes = 4;
4881
break;
4882
#ifdef CONFIG_X86_64
4883
case X86EMUL_MODE_PROT64:
4884
def_op_bytes = 4;
4885
def_ad_bytes = 8;
4886
break;
4887
#endif
4888
default:
4889
return EMULATION_FAILED;
4890
}
4891
4892
ctxt->op_bytes = def_op_bytes;
4893
ctxt->ad_bytes = def_ad_bytes;
4894
4895
/* Legacy prefixes. */
4896
for (;;) {
4897
switch (ctxt->b = insn_fetch(u8, ctxt)) {
4898
case 0x66: /* operand-size override */
4899
ctxt->op_prefix = true;
4900
/* switch between 2/4 bytes */
4901
ctxt->op_bytes = def_op_bytes ^ 6;
4902
break;
4903
case 0x67: /* address-size override */
4904
if (mode == X86EMUL_MODE_PROT64)
4905
/* switch between 4/8 bytes */
4906
ctxt->ad_bytes = def_ad_bytes ^ 12;
4907
else
4908
/* switch between 2/4 bytes */
4909
ctxt->ad_bytes = def_ad_bytes ^ 6;
4910
break;
4911
case 0x26: /* ES override */
4912
has_seg_override = true;
4913
ctxt->seg_override = VCPU_SREG_ES;
4914
break;
4915
case 0x2e: /* CS override */
4916
has_seg_override = true;
4917
ctxt->seg_override = VCPU_SREG_CS;
4918
break;
4919
case 0x36: /* SS override */
4920
has_seg_override = true;
4921
ctxt->seg_override = VCPU_SREG_SS;
4922
break;
4923
case 0x3e: /* DS override */
4924
has_seg_override = true;
4925
ctxt->seg_override = VCPU_SREG_DS;
4926
break;
4927
case 0x64: /* FS override */
4928
has_seg_override = true;
4929
ctxt->seg_override = VCPU_SREG_FS;
4930
break;
4931
case 0x65: /* GS override */
4932
has_seg_override = true;
4933
ctxt->seg_override = VCPU_SREG_GS;
4934
break;
4935
case 0x40 ... 0x4f: /* REX */
4936
if (mode != X86EMUL_MODE_PROT64)
4937
goto done_prefixes;
4938
ctxt->rex_prefix = REX_PREFIX;
4939
ctxt->rex_bits = ctxt->b & 0xf;
4940
continue;
4941
case 0xf0: /* LOCK */
4942
ctxt->lock_prefix = 1;
4943
break;
4944
case 0xf2: /* REPNE/REPNZ */
4945
case 0xf3: /* REP/REPE/REPZ */
4946
ctxt->rep_prefix = ctxt->b;
4947
break;
4948
default:
4949
goto done_prefixes;
4950
}
4951
4952
/* Any legacy prefix after a REX prefix nullifies its effect. */
4953
ctxt->rex_prefix = REX_NONE;
4954
ctxt->rex_bits = 0;
4955
}
4956
4957
done_prefixes:
4958
4959
/* REX prefix. */
4960
if (ctxt->rex_bits & REX_W)
4961
ctxt->op_bytes = 8;
4962
4963
/* Opcode byte(s). */
4964
if (ctxt->b == 0xc4 || ctxt->b == 0xc5) {
4965
/* VEX or LDS/LES */
4966
u8 vex_2nd = insn_fetch(u8, ctxt);
4967
if (mode != X86EMUL_MODE_PROT64 && (vex_2nd & 0xc0) != 0xc0) {
4968
opcode = opcode_table[ctxt->b];
4969
ctxt->modrm = vex_2nd;
4970
/* the Mod/RM byte has been fetched already! */
4971
goto done_modrm;
4972
}
4973
4974
vex_prefix = true;
4975
rc = x86_decode_avx(ctxt, ctxt->b, vex_2nd, &opcode);
4976
if (rc != X86EMUL_CONTINUE)
4977
goto done;
4978
} else if (ctxt->b == 0x0f) {
4979
/* Two- or three-byte opcode */
4980
ctxt->opcode_len = 2;
4981
ctxt->b = insn_fetch(u8, ctxt);
4982
opcode = twobyte_table[ctxt->b];
4983
4984
/* 0F_38 opcode map */
4985
if (ctxt->b == 0x38) {
4986
ctxt->opcode_len = 3;
4987
ctxt->b = insn_fetch(u8, ctxt);
4988
opcode = opcode_map_0f_38[ctxt->b];
4989
}
4990
} else {
4991
/* Opcode byte(s). */
4992
opcode = opcode_table[ctxt->b];
4993
}
4994
4995
if (opcode.flags & ModRM)
4996
ctxt->modrm = insn_fetch(u8, ctxt);
4997
4998
done_modrm:
4999
ctxt->d = opcode.flags;
5000
while (ctxt->d & GroupMask) {
5001
switch (ctxt->d & GroupMask) {
5002
case Group:
5003
goffset = (ctxt->modrm >> 3) & 7;
5004
opcode = opcode.u.group[goffset];
5005
break;
5006
case GroupDual:
5007
goffset = (ctxt->modrm >> 3) & 7;
5008
if ((ctxt->modrm >> 6) == 3)
5009
opcode = opcode.u.gdual->mod3[goffset];
5010
else
5011
opcode = opcode.u.gdual->mod012[goffset];
5012
break;
5013
case RMExt:
5014
goffset = ctxt->modrm & 7;
5015
opcode = opcode.u.group[goffset];
5016
break;
5017
case Prefix:
5018
if (ctxt->rep_prefix && ctxt->op_prefix)
5019
return EMULATION_FAILED;
5020
simd_prefix = ctxt->op_prefix ? 0x66 : ctxt->rep_prefix;
5021
switch (simd_prefix) {
5022
case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5023
case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5024
case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5025
case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5026
}
5027
break;
5028
case Escape:
5029
if (ctxt->modrm > 0xbf) {
5030
size_t size = ARRAY_SIZE(opcode.u.esc->high);
5031
u32 index = array_index_nospec(
5032
ctxt->modrm - 0xc0, size);
5033
5034
opcode = opcode.u.esc->high[index];
5035
} else {
5036
opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5037
}
5038
break;
5039
case InstrDual:
5040
if ((ctxt->modrm >> 6) == 3)
5041
opcode = opcode.u.idual->mod3;
5042
else
5043
opcode = opcode.u.idual->mod012;
5044
break;
5045
case ModeDual:
5046
if (ctxt->mode == X86EMUL_MODE_PROT64)
5047
opcode = opcode.u.mdual->mode64;
5048
else
5049
opcode = opcode.u.mdual->mode32;
5050
break;
5051
default:
5052
return EMULATION_FAILED;
5053
}
5054
5055
ctxt->d &= ~(u64)GroupMask;
5056
ctxt->d |= opcode.flags;
5057
}
5058
5059
ctxt->is_branch = opcode.flags & IsBranch;
5060
5061
/* Unrecognised? */
5062
if (ctxt->d == 0)
5063
return EMULATION_FAILED;
5064
5065
if (unlikely(vex_prefix)) {
5066
/*
5067
* Only specifically marked instructions support VEX. Since many
5068
* instructions support it but are not annotated, return not implemented
5069
* rather than #UD.
5070
*/
5071
if (!(ctxt->d & Avx))
5072
return EMULATION_FAILED;
5073
5074
if (!(ctxt->d & AlignMask))
5075
ctxt->d |= Unaligned;
5076
}
5077
5078
ctxt->execute = opcode.u.execute;
5079
5080
/*
5081
* Reject emulation if KVM might need to emulate shadow stack updates
5082
* and/or indirect branch tracking enforcement, which the emulator
5083
* doesn't support.
5084
*/
5085
if ((is_ibt_instruction(ctxt) || is_shstk_instruction(ctxt)) &&
5086
ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
5087
u64 u_cet = 0, s_cet = 0;
5088
5089
/*
5090
* Check both User and Supervisor on far transfers as inter-
5091
* privilege level transfers are impacted by CET at the target
5092
* privilege level, and that is not known at this time. The
5093
* expectation is that the guest will not require emulation of
5094
* any CET-affected instructions at any privilege level.
5095
*/
5096
if (!(ctxt->d & NearBranch))
5097
u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5098
else if (ctxt->ops->cpl(ctxt) == 3)
5099
u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5100
else
5101
s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5102
5103
if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
5104
(s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
5105
return EMULATION_FAILED;
5106
5107
if ((u_cet | s_cet) & CET_SHSTK_EN && is_shstk_instruction(ctxt))
5108
return EMULATION_FAILED;
5109
5110
if ((u_cet | s_cet) & CET_ENDBR_EN && is_ibt_instruction(ctxt))
5111
return EMULATION_FAILED;
5112
}
5113
5114
if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
5115
likely(!(ctxt->d & EmulateOnUD)))
5116
return EMULATION_FAILED;
5117
5118
if (unlikely(ctxt->d &
5119
(NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5120
No16))) {
5121
/*
5122
* These are copied unconditionally here, and checked unconditionally
5123
* in x86_emulate_insn.
5124
*/
5125
ctxt->check_perm = opcode.check_perm;
5126
ctxt->intercept = opcode.intercept;
5127
5128
if (ctxt->d & NotImpl)
5129
return EMULATION_FAILED;
5130
5131
if (mode == X86EMUL_MODE_PROT64) {
5132
if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5133
ctxt->op_bytes = 8;
5134
else if (ctxt->d & NearBranch)
5135
ctxt->op_bytes = 8;
5136
}
5137
5138
if (ctxt->d & Op3264) {
5139
if (mode == X86EMUL_MODE_PROT64)
5140
ctxt->op_bytes = 8;
5141
else
5142
ctxt->op_bytes = 4;
5143
}
5144
5145
if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5146
ctxt->op_bytes = 4;
5147
5148
if (vex_prefix)
5149
;
5150
else if (ctxt->d & Sse)
5151
ctxt->op_bytes = 16, ctxt->d &= ~Avx;
5152
else if (ctxt->d & Mmx)
5153
ctxt->op_bytes = 8;
5154
}
5155
5156
/* ModRM and SIB bytes. */
5157
if (ctxt->d & ModRM) {
5158
rc = decode_modrm(ctxt, &ctxt->memop);
5159
if (!has_seg_override) {
5160
has_seg_override = true;
5161
ctxt->seg_override = ctxt->modrm_seg;
5162
}
5163
} else if (ctxt->d & MemAbs)
5164
rc = decode_abs(ctxt, &ctxt->memop);
5165
if (rc != X86EMUL_CONTINUE)
5166
goto done;
5167
5168
if (!has_seg_override)
5169
ctxt->seg_override = VCPU_SREG_DS;
5170
5171
ctxt->memop.addr.mem.seg = ctxt->seg_override;
5172
5173
/*
5174
* Decode and fetch the source operand: register, memory
5175
* or immediate.
5176
*/
5177
rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5178
if (rc != X86EMUL_CONTINUE)
5179
goto done;
5180
5181
/*
5182
* Decode and fetch the second source operand: register, memory
5183
* or immediate.
5184
*/
5185
rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5186
if (rc != X86EMUL_CONTINUE)
5187
goto done;
5188
5189
/* Decode and fetch the destination operand: register or memory. */
5190
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5191
5192
if (ctxt->rip_relative && likely(ctxt->memopp))
5193
ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5194
ctxt->memopp->addr.mem.ea + ctxt->_eip);
5195
5196
done:
5197
if (rc == X86EMUL_PROPAGATE_FAULT)
5198
ctxt->have_exception = true;
5199
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5200
}
5201
5202
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5203
{
5204
return ctxt->d & PageTable;
5205
}
5206
5207
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5208
{
5209
/* The second termination condition only applies for REPE
5210
* and REPNE. Test if the repeat string operation prefix is
5211
* REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5212
* corresponding termination condition according to:
5213
* - if REPE/REPZ and ZF = 0 then done
5214
* - if REPNE/REPNZ and ZF = 1 then done
5215
*/
5216
if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5217
(ctxt->b == 0xae) || (ctxt->b == 0xaf))
5218
&& (((ctxt->rep_prefix == REPE_PREFIX) &&
5219
((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5220
|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
5221
((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5222
return true;
5223
5224
return false;
5225
}
5226
5227
static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5228
{
5229
int rc;
5230
5231
kvm_fpu_get();
5232
rc = asm_safe("fwait");
5233
kvm_fpu_put();
5234
5235
if (unlikely(rc != X86EMUL_CONTINUE))
5236
return emulate_exception(ctxt, MF_VECTOR, 0, false);
5237
5238
return X86EMUL_CONTINUE;
5239
}
5240
5241
static void fetch_possible_mmx_operand(struct operand *op)
5242
{
5243
if (op->type == OP_MM)
5244
kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5245
}
5246
5247
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5248
{
5249
/* Clear fields that are set conditionally but read without a guard. */
5250
ctxt->rip_relative = false;
5251
ctxt->rex_prefix = REX_NONE;
5252
ctxt->rex_bits = 0;
5253
ctxt->lock_prefix = 0;
5254
ctxt->op_prefix = false;
5255
ctxt->rep_prefix = 0;
5256
ctxt->regs_valid = 0;
5257
ctxt->regs_dirty = 0;
5258
5259
ctxt->io_read.pos = 0;
5260
ctxt->io_read.end = 0;
5261
ctxt->mem_read.end = 0;
5262
}
5263
5264
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
5265
{
5266
const struct x86_emulate_ops *ops = ctxt->ops;
5267
int rc = X86EMUL_CONTINUE;
5268
int saved_dst_type = ctxt->dst.type;
5269
5270
ctxt->mem_read.pos = 0;
5271
5272
/* LOCK prefix is allowed only with some instructions */
5273
if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5274
rc = emulate_ud(ctxt);
5275
goto done;
5276
}
5277
5278
if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5279
rc = emulate_ud(ctxt);
5280
goto done;
5281
}
5282
5283
if (unlikely(ctxt->d &
5284
(No64|Undefined|Avx|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5285
if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5286
(ctxt->d & Undefined)) {
5287
rc = emulate_ud(ctxt);
5288
goto done;
5289
}
5290
5291
if ((ctxt->d & (Avx|Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) {
5292
rc = emulate_ud(ctxt);
5293
goto done;
5294
}
5295
5296
if (ctxt->d & Avx) {
5297
u64 xcr = 0;
5298
if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE)
5299
|| ops->get_xcr(ctxt, 0, &xcr)
5300
|| !(xcr & XFEATURE_MASK_YMM)) {
5301
rc = emulate_ud(ctxt);
5302
goto done;
5303
}
5304
} else if (ctxt->d & Sse) {
5305
if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) {
5306
rc = emulate_ud(ctxt);
5307
goto done;
5308
}
5309
}
5310
5311
if ((ctxt->d & (Avx|Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5312
rc = emulate_nm(ctxt);
5313
goto done;
5314
}
5315
5316
if (ctxt->d & Mmx) {
5317
rc = flush_pending_x87_faults(ctxt);
5318
if (rc != X86EMUL_CONTINUE)
5319
goto done;
5320
/*
5321
* Now that we know the fpu is exception safe, we can fetch
5322
* operands from it.
5323
*/
5324
fetch_possible_mmx_operand(&ctxt->src);
5325
fetch_possible_mmx_operand(&ctxt->src2);
5326
if (!(ctxt->d & Mov))
5327
fetch_possible_mmx_operand(&ctxt->dst);
5328
}
5329
5330
if (unlikely(check_intercepts) && ctxt->intercept) {
5331
rc = emulator_check_intercept(ctxt, ctxt->intercept,
5332
X86_ICPT_PRE_EXCEPT);
5333
if (rc != X86EMUL_CONTINUE)
5334
goto done;
5335
}
5336
5337
/* Instruction can only be executed in protected mode */
5338
if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5339
rc = emulate_ud(ctxt);
5340
goto done;
5341
}
5342
5343
/* Privileged instruction can be executed only in CPL=0 */
5344
if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5345
if (ctxt->d & PrivUD)
5346
rc = emulate_ud(ctxt);
5347
else
5348
rc = emulate_gp(ctxt, 0);
5349
goto done;
5350
}
5351
5352
/* Do instruction specific permission checks */
5353
if (ctxt->d & CheckPerm) {
5354
rc = ctxt->check_perm(ctxt);
5355
if (rc != X86EMUL_CONTINUE)
5356
goto done;
5357
}
5358
5359
if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
5360
rc = emulator_check_intercept(ctxt, ctxt->intercept,
5361
X86_ICPT_POST_EXCEPT);
5362
if (rc != X86EMUL_CONTINUE)
5363
goto done;
5364
}
5365
5366
if (ctxt->rep_prefix && (ctxt->d & String)) {
5367
/* All REP prefixes have the same first termination condition */
5368
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5369
string_registers_quirk(ctxt);
5370
ctxt->eip = ctxt->_eip;
5371
ctxt->eflags &= ~X86_EFLAGS_RF;
5372
goto done;
5373
}
5374
}
5375
}
5376
5377
if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5378
rc = segmented_read(ctxt, ctxt->src.addr.mem,
5379
ctxt->src.valptr, ctxt->src.bytes);
5380
if (rc != X86EMUL_CONTINUE)
5381
goto done;
5382
ctxt->src.orig_val64 = ctxt->src.val64;
5383
}
5384
5385
if (ctxt->src2.type == OP_MEM) {
5386
rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5387
&ctxt->src2.val, ctxt->src2.bytes);
5388
if (rc != X86EMUL_CONTINUE)
5389
goto done;
5390
}
5391
5392
if ((ctxt->d & DstMask) == ImplicitOps)
5393
goto special_insn;
5394
5395
5396
if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5397
/* optimisation - avoid slow emulated read if Mov */
5398
rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5399
&ctxt->dst.val, ctxt->dst.bytes);
5400
if (rc != X86EMUL_CONTINUE) {
5401
if (!(ctxt->d & NoWrite) &&
5402
rc == X86EMUL_PROPAGATE_FAULT &&
5403
ctxt->exception.vector == PF_VECTOR)
5404
ctxt->exception.error_code |= PFERR_WRITE_MASK;
5405
goto done;
5406
}
5407
}
5408
/* Copy full 64-bit value for CMPXCHG8B. */
5409
ctxt->dst.orig_val64 = ctxt->dst.val64;
5410
5411
special_insn:
5412
5413
if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
5414
rc = emulator_check_intercept(ctxt, ctxt->intercept,
5415
X86_ICPT_POST_MEMACCESS);
5416
if (rc != X86EMUL_CONTINUE)
5417
goto done;
5418
}
5419
5420
if (ctxt->rep_prefix && (ctxt->d & String))
5421
ctxt->eflags |= X86_EFLAGS_RF;
5422
else
5423
ctxt->eflags &= ~X86_EFLAGS_RF;
5424
5425
if (ctxt->execute) {
5426
rc = ctxt->execute(ctxt);
5427
if (rc != X86EMUL_CONTINUE)
5428
goto done;
5429
goto writeback;
5430
}
5431
5432
if (ctxt->opcode_len == 2)
5433
goto twobyte_insn;
5434
else if (ctxt->opcode_len == 3)
5435
goto threebyte_insn;
5436
5437
switch (ctxt->b) {
5438
case 0x70 ... 0x7f: /* jcc (short) */
5439
if (test_cc(ctxt->b, ctxt->eflags))
5440
rc = jmp_rel(ctxt, ctxt->src.val);
5441
break;
5442
case 0x8d: /* lea r16/r32, m */
5443
ctxt->dst.val = ctxt->src.addr.mem.ea;
5444
break;
5445
case 0x90 ... 0x97: /* nop / xchg reg, rax */
5446
if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5447
ctxt->dst.type = OP_NONE;
5448
else
5449
rc = em_xchg(ctxt);
5450
break;
5451
case 0x98: /* cbw/cwde/cdqe */
5452
switch (ctxt->op_bytes) {
5453
case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5454
case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5455
case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5456
}
5457
break;
5458
case 0xcc: /* int3 */
5459
rc = emulate_int(ctxt, 3);
5460
break;
5461
case 0xcd: /* int n */
5462
rc = emulate_int(ctxt, ctxt->src.val);
5463
break;
5464
case 0xce: /* into */
5465
if (ctxt->eflags & X86_EFLAGS_OF)
5466
rc = emulate_int(ctxt, 4);
5467
break;
5468
case 0xe9: /* jmp rel */
5469
case 0xeb: /* jmp rel short */
5470
rc = jmp_rel(ctxt, ctxt->src.val);
5471
ctxt->dst.type = OP_NONE; /* Disable writeback. */
5472
break;
5473
case 0xf4: /* hlt */
5474
ctxt->ops->halt(ctxt);
5475
break;
5476
case 0xf5: /* cmc */
5477
/* complement carry flag from eflags reg */
5478
ctxt->eflags ^= X86_EFLAGS_CF;
5479
break;
5480
case 0xf8: /* clc */
5481
ctxt->eflags &= ~X86_EFLAGS_CF;
5482
break;
5483
case 0xf9: /* stc */
5484
ctxt->eflags |= X86_EFLAGS_CF;
5485
break;
5486
case 0xfc: /* cld */
5487
ctxt->eflags &= ~X86_EFLAGS_DF;
5488
break;
5489
case 0xfd: /* std */
5490
ctxt->eflags |= X86_EFLAGS_DF;
5491
break;
5492
default:
5493
goto cannot_emulate;
5494
}
5495
5496
if (rc != X86EMUL_CONTINUE)
5497
goto done;
5498
5499
writeback:
5500
if (ctxt->d & SrcWrite) {
5501
BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5502
rc = writeback(ctxt, &ctxt->src);
5503
if (rc != X86EMUL_CONTINUE)
5504
goto done;
5505
}
5506
if (!(ctxt->d & NoWrite)) {
5507
rc = writeback(ctxt, &ctxt->dst);
5508
if (rc != X86EMUL_CONTINUE)
5509
goto done;
5510
}
5511
5512
/*
5513
* restore dst type in case the decoding will be reused
5514
* (happens for string instruction )
5515
*/
5516
ctxt->dst.type = saved_dst_type;
5517
5518
if ((ctxt->d & SrcMask) == SrcSI)
5519
string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5520
5521
if ((ctxt->d & DstMask) == DstDI)
5522
string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5523
5524
if (ctxt->rep_prefix && (ctxt->d & String)) {
5525
unsigned int count;
5526
struct read_cache *r = &ctxt->io_read;
5527
if ((ctxt->d & SrcMask) == SrcSI)
5528
count = ctxt->src.count;
5529
else
5530
count = ctxt->dst.count;
5531
register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5532
5533
if (!string_insn_completed(ctxt)) {
5534
/*
5535
* Re-enter guest when pio read ahead buffer is empty
5536
* or, if it is not used, after each 1024 iteration.
5537
*/
5538
if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5539
(r->end == 0 || r->end != r->pos)) {
5540
/*
5541
* Reset read cache. Usually happens before
5542
* decode, but since instruction is restarted
5543
* we have to do it here.
5544
*/
5545
ctxt->mem_read.end = 0;
5546
writeback_registers(ctxt);
5547
return EMULATION_RESTART;
5548
}
5549
goto done; /* skip rip writeback */
5550
}
5551
ctxt->eflags &= ~X86_EFLAGS_RF;
5552
}
5553
5554
ctxt->eip = ctxt->_eip;
5555
if (ctxt->mode != X86EMUL_MODE_PROT64)
5556
ctxt->eip = (u32)ctxt->_eip;
5557
5558
done:
5559
if (rc == X86EMUL_PROPAGATE_FAULT) {
5560
if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5561
return EMULATION_FAILED;
5562
ctxt->have_exception = true;
5563
}
5564
if (rc == X86EMUL_INTERCEPTED)
5565
return EMULATION_INTERCEPTED;
5566
5567
if (rc == X86EMUL_CONTINUE)
5568
writeback_registers(ctxt);
5569
5570
return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5571
5572
twobyte_insn:
5573
switch (ctxt->b) {
5574
case 0x09: /* wbinvd */
5575
(ctxt->ops->wbinvd)(ctxt);
5576
break;
5577
case 0x08: /* invd */
5578
case 0x0d: /* GrpP (prefetch) */
5579
case 0x18: /* Grp16 (prefetch/nop) */
5580
case 0x1f: /* nop */
5581
break;
5582
case 0x20: /* mov cr, reg */
5583
ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5584
break;
5585
case 0x21: /* mov from dr to reg */
5586
ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg);
5587
break;
5588
case 0x40 ... 0x4f: /* cmov */
5589
if (test_cc(ctxt->b, ctxt->eflags))
5590
ctxt->dst.val = ctxt->src.val;
5591
else if (ctxt->op_bytes != 4)
5592
ctxt->dst.type = OP_NONE; /* no writeback */
5593
break;
5594
case 0x80 ... 0x8f: /* jnz rel, etc*/
5595
if (test_cc(ctxt->b, ctxt->eflags))
5596
rc = jmp_rel(ctxt, ctxt->src.val);
5597
break;
5598
case 0x90 ... 0x9f: /* setcc r/m8 */
5599
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5600
break;
5601
case 0xb6 ... 0xb7: /* movzx */
5602
ctxt->dst.bytes = ctxt->op_bytes;
5603
ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5604
: (u16) ctxt->src.val;
5605
break;
5606
case 0xbe ... 0xbf: /* movsx */
5607
ctxt->dst.bytes = ctxt->op_bytes;
5608
ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5609
(s16) ctxt->src.val;
5610
break;
5611
default:
5612
goto cannot_emulate;
5613
}
5614
5615
threebyte_insn:
5616
5617
if (rc != X86EMUL_CONTINUE)
5618
goto done;
5619
5620
goto writeback;
5621
5622
cannot_emulate:
5623
return EMULATION_FAILED;
5624
}
5625
5626
void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5627
{
5628
invalidate_registers(ctxt);
5629
}
5630
5631
void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5632
{
5633
writeback_registers(ctxt);
5634
}
5635
5636
bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5637
{
5638
if (ctxt->rep_prefix && (ctxt->d & String))
5639
return false;
5640
5641
if (ctxt->d & TwoMemOp)
5642
return false;
5643
5644
return true;
5645
}
5646
5647