Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/amd64/vmm/vmm_instruction_emul.c
39536 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2012 Sandvine, Inc.
5
* Copyright (c) 2012 NetApp, Inc.
6
* All rights reserved.
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met:
11
* 1. Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
*
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
* SUCH DAMAGE.
28
*/
29
30
#include <sys/cdefs.h>
31
#ifdef _KERNEL
32
#include <sys/param.h>
33
#include <sys/pcpu.h>
34
#include <sys/systm.h>
35
#include <sys/proc.h>
36
37
#include <vm/vm.h>
38
#include <vm/pmap.h>
39
40
#include <machine/vmparam.h>
41
#include <machine/vmm.h>
42
43
#include <dev/vmm/vmm_mem.h>
44
#else /* !_KERNEL */
45
#include <sys/types.h>
46
#include <sys/errno.h>
47
#include <sys/_iovec.h>
48
49
#include <machine/vmm.h>
50
51
#include <err.h>
52
#include <assert.h>
53
#include <stdbool.h>
54
#include <stddef.h>
55
#include <stdio.h>
56
#include <string.h>
57
#include <strings.h>
58
#include <vmmapi.h>
59
#define __diagused
60
#define KASSERT(exp,msg) assert((exp))
61
#define panic(...) errx(4, __VA_ARGS__)
62
#endif /* _KERNEL */
63
64
#include <machine/vmm_instruction_emul.h>
65
#include <x86/psl.h>
66
#include <x86/specialreg.h>
67
68
/* struct vie_op.op_flags */
69
#define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */
70
#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
71
#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */
72
#define VIE_OP_F_NO_MODRM (1 << 3)
73
#define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4)
74
75
static const struct vie_op three_byte_opcodes_0f38[256] = {
76
[0xF7] = {
77
.op_byte = 0xF7,
78
.op_type = VIE_OP_TYPE_BEXTR,
79
},
80
};
81
82
static const struct vie_op two_byte_opcodes[256] = {
83
[0xAE] = {
84
.op_byte = 0xAE,
85
.op_type = VIE_OP_TYPE_TWOB_GRP15,
86
},
87
[0xB6] = {
88
.op_byte = 0xB6,
89
.op_type = VIE_OP_TYPE_MOVZX,
90
},
91
[0xB7] = {
92
.op_byte = 0xB7,
93
.op_type = VIE_OP_TYPE_MOVZX,
94
},
95
[0xBA] = {
96
.op_byte = 0xBA,
97
.op_type = VIE_OP_TYPE_BITTEST,
98
.op_flags = VIE_OP_F_IMM8,
99
},
100
[0xBE] = {
101
.op_byte = 0xBE,
102
.op_type = VIE_OP_TYPE_MOVSX,
103
},
104
};
105
106
static const struct vie_op one_byte_opcodes[256] = {
107
[0x03] = {
108
.op_byte = 0x03,
109
.op_type = VIE_OP_TYPE_ADD,
110
},
111
[0x0F] = {
112
.op_byte = 0x0F,
113
.op_type = VIE_OP_TYPE_TWO_BYTE
114
},
115
[0x0B] = {
116
.op_byte = 0x0B,
117
.op_type = VIE_OP_TYPE_OR,
118
},
119
[0x2B] = {
120
.op_byte = 0x2B,
121
.op_type = VIE_OP_TYPE_SUB,
122
},
123
[0x39] = {
124
.op_byte = 0x39,
125
.op_type = VIE_OP_TYPE_CMP,
126
},
127
[0x3B] = {
128
.op_byte = 0x3B,
129
.op_type = VIE_OP_TYPE_CMP,
130
},
131
[0x6E] = {
132
.op_byte = 0x6E,
133
.op_type = VIE_OP_TYPE_OUTS,
134
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION,
135
},
136
[0x6F] = {
137
.op_byte = 0x6F,
138
.op_type = VIE_OP_TYPE_OUTS,
139
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION,
140
},
141
[0x88] = {
142
.op_byte = 0x88,
143
.op_type = VIE_OP_TYPE_MOV,
144
},
145
[0x89] = {
146
.op_byte = 0x89,
147
.op_type = VIE_OP_TYPE_MOV,
148
},
149
[0x8A] = {
150
.op_byte = 0x8A,
151
.op_type = VIE_OP_TYPE_MOV,
152
},
153
[0x8B] = {
154
.op_byte = 0x8B,
155
.op_type = VIE_OP_TYPE_MOV,
156
},
157
[0xA1] = {
158
.op_byte = 0xA1,
159
.op_type = VIE_OP_TYPE_MOV,
160
.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
161
},
162
[0xA3] = {
163
.op_byte = 0xA3,
164
.op_type = VIE_OP_TYPE_MOV,
165
.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
166
},
167
[0xA4] = {
168
.op_byte = 0xA4,
169
.op_type = VIE_OP_TYPE_MOVS,
170
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
171
},
172
[0xA5] = {
173
.op_byte = 0xA5,
174
.op_type = VIE_OP_TYPE_MOVS,
175
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
176
},
177
[0xAA] = {
178
.op_byte = 0xAA,
179
.op_type = VIE_OP_TYPE_STOS,
180
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
181
},
182
[0xAB] = {
183
.op_byte = 0xAB,
184
.op_type = VIE_OP_TYPE_STOS,
185
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
186
},
187
[0xC6] = {
188
/* XXX Group 11 extended opcode - not just MOV */
189
.op_byte = 0xC6,
190
.op_type = VIE_OP_TYPE_MOV,
191
.op_flags = VIE_OP_F_IMM8,
192
},
193
[0xC7] = {
194
.op_byte = 0xC7,
195
.op_type = VIE_OP_TYPE_MOV,
196
.op_flags = VIE_OP_F_IMM,
197
},
198
[0x23] = {
199
.op_byte = 0x23,
200
.op_type = VIE_OP_TYPE_AND,
201
},
202
[0x80] = {
203
/* Group 1 extended opcode */
204
.op_byte = 0x80,
205
.op_type = VIE_OP_TYPE_GROUP1,
206
.op_flags = VIE_OP_F_IMM8,
207
},
208
[0x81] = {
209
/* Group 1 extended opcode */
210
.op_byte = 0x81,
211
.op_type = VIE_OP_TYPE_GROUP1,
212
.op_flags = VIE_OP_F_IMM,
213
},
214
[0x83] = {
215
/* Group 1 extended opcode */
216
.op_byte = 0x83,
217
.op_type = VIE_OP_TYPE_GROUP1,
218
.op_flags = VIE_OP_F_IMM8,
219
},
220
[0x8F] = {
221
/* XXX Group 1A extended opcode - not just POP */
222
.op_byte = 0x8F,
223
.op_type = VIE_OP_TYPE_POP,
224
},
225
[0xF6] = {
226
/* XXX Group 3 extended opcode - not just TEST */
227
.op_byte = 0xF6,
228
.op_type = VIE_OP_TYPE_TEST,
229
.op_flags = VIE_OP_F_IMM8,
230
},
231
[0xF7] = {
232
/* XXX Group 3 extended opcode - not just TEST */
233
.op_byte = 0xF7,
234
.op_type = VIE_OP_TYPE_TEST,
235
.op_flags = VIE_OP_F_IMM,
236
},
237
[0xFF] = {
238
/* XXX Group 5 extended opcode - not just PUSH */
239
.op_byte = 0xFF,
240
.op_type = VIE_OP_TYPE_PUSH,
241
}
242
};
243
244
/* struct vie.mod */
245
#define VIE_MOD_INDIRECT 0
246
#define VIE_MOD_INDIRECT_DISP8 1
247
#define VIE_MOD_INDIRECT_DISP32 2
248
#define VIE_MOD_DIRECT 3
249
250
/* struct vie.rm */
251
#define VIE_RM_SIB 4
252
#define VIE_RM_DISP32 5
253
254
#define GB (1024 * 1024 * 1024)
255
256
static enum vm_reg_name gpr_map[16] = {
257
VM_REG_GUEST_RAX,
258
VM_REG_GUEST_RCX,
259
VM_REG_GUEST_RDX,
260
VM_REG_GUEST_RBX,
261
VM_REG_GUEST_RSP,
262
VM_REG_GUEST_RBP,
263
VM_REG_GUEST_RSI,
264
VM_REG_GUEST_RDI,
265
VM_REG_GUEST_R8,
266
VM_REG_GUEST_R9,
267
VM_REG_GUEST_R10,
268
VM_REG_GUEST_R11,
269
VM_REG_GUEST_R12,
270
VM_REG_GUEST_R13,
271
VM_REG_GUEST_R14,
272
VM_REG_GUEST_R15
273
};
274
275
static uint64_t size2mask[] = {
276
[1] = 0xff,
277
[2] = 0xffff,
278
[4] = 0xffffffff,
279
[8] = 0xffffffffffffffff,
280
};
281
282
static int
283
vie_read_register(struct vcpu *vcpu, enum vm_reg_name reg, uint64_t *rval)
284
{
285
int error;
286
287
error = vm_get_register(vcpu, reg, rval);
288
289
return (error);
290
}
291
292
static void
293
vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr)
294
{
295
*lhbr = 0;
296
*reg = gpr_map[vie->reg];
297
298
/*
299
* 64-bit mode imposes limitations on accessing legacy high byte
300
* registers (lhbr).
301
*
302
* The legacy high-byte registers cannot be addressed if the REX
303
* prefix is present. In this case the values 4, 5, 6 and 7 of the
304
* 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively.
305
*
306
* If the REX prefix is not present then the values 4, 5, 6 and 7
307
* of the 'ModRM:reg' field address the legacy high-byte registers,
308
* %ah, %ch, %dh and %bh respectively.
309
*/
310
if (!vie->rex_present) {
311
if (vie->reg & 0x4) {
312
*lhbr = 1;
313
*reg = gpr_map[vie->reg & 0x3];
314
}
315
}
316
}
317
318
static int
319
vie_read_bytereg(struct vcpu *vcpu, struct vie *vie, uint8_t *rval)
320
{
321
uint64_t val;
322
int error, lhbr;
323
enum vm_reg_name reg;
324
325
vie_calc_bytereg(vie, &reg, &lhbr);
326
error = vm_get_register(vcpu, reg, &val);
327
328
/*
329
* To obtain the value of a legacy high byte register shift the
330
* base register right by 8 bits (%ah = %rax >> 8).
331
*/
332
if (lhbr)
333
*rval = val >> 8;
334
else
335
*rval = val;
336
return (error);
337
}
338
339
static int
340
vie_write_bytereg(struct vcpu *vcpu, struct vie *vie, uint8_t byte)
341
{
342
uint64_t origval, val, mask;
343
int error, lhbr;
344
enum vm_reg_name reg;
345
346
vie_calc_bytereg(vie, &reg, &lhbr);
347
error = vm_get_register(vcpu, reg, &origval);
348
if (error == 0) {
349
val = byte;
350
mask = 0xff;
351
if (lhbr) {
352
/*
353
* Shift left by 8 to store 'byte' in a legacy high
354
* byte register.
355
*/
356
val <<= 8;
357
mask <<= 8;
358
}
359
val |= origval & ~mask;
360
error = vm_set_register(vcpu, reg, val);
361
}
362
return (error);
363
}
364
365
int
366
vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg,
367
uint64_t val, int size)
368
{
369
int error;
370
uint64_t origval;
371
372
switch (size) {
373
case 1:
374
case 2:
375
error = vie_read_register(vcpu, reg, &origval);
376
if (error)
377
return (error);
378
val &= size2mask[size];
379
val |= origval & ~size2mask[size];
380
break;
381
case 4:
382
val &= 0xffffffffUL;
383
break;
384
case 8:
385
break;
386
default:
387
return (EINVAL);
388
}
389
390
error = vm_set_register(vcpu, reg, val);
391
return (error);
392
}
393
394
#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
395
396
/*
397
* Return the status flags that would result from doing (x - y).
398
*/
399
#define GETCC(sz) \
400
static u_long \
401
getcc##sz(uint##sz##_t x, uint##sz##_t y) \
402
{ \
403
u_long rflags; \
404
\
405
__asm __volatile("sub %2,%1; pushfq; popq %0" : \
406
"=r" (rflags), "+r" (x) : "m" (y)); \
407
return (rflags); \
408
} struct __hack
409
410
GETCC(8);
411
GETCC(16);
412
GETCC(32);
413
GETCC(64);
414
415
static u_long
416
getcc(int opsize, uint64_t x, uint64_t y)
417
{
418
KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
419
("getcc: invalid operand size %d", opsize));
420
421
if (opsize == 1)
422
return (getcc8(x, y));
423
else if (opsize == 2)
424
return (getcc16(x, y));
425
else if (opsize == 4)
426
return (getcc32(x, y));
427
else
428
return (getcc64(x, y));
429
}
430
431
/*
432
* Macro creation of functions getaddflags{8,16,32,64}
433
*/
434
#define GETADDFLAGS(sz) \
435
static u_long \
436
getaddflags##sz(uint##sz##_t x, uint##sz##_t y) \
437
{ \
438
u_long rflags; \
439
\
440
__asm __volatile("add %2,%1; pushfq; popq %0" : \
441
"=r" (rflags), "+r" (x) : "m" (y)); \
442
return (rflags); \
443
} struct __hack
444
445
GETADDFLAGS(8);
446
GETADDFLAGS(16);
447
GETADDFLAGS(32);
448
GETADDFLAGS(64);
449
450
static u_long
451
getaddflags(int opsize, uint64_t x, uint64_t y)
452
{
453
KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
454
("getaddflags: invalid operand size %d", opsize));
455
456
if (opsize == 1)
457
return (getaddflags8(x, y));
458
else if (opsize == 2)
459
return (getaddflags16(x, y));
460
else if (opsize == 4)
461
return (getaddflags32(x, y));
462
else
463
return (getaddflags64(x, y));
464
}
465
466
/*
467
* Return the status flags that would result from doing (x & y).
468
*/
469
#define GETANDFLAGS(sz) \
470
static u_long \
471
getandflags##sz(uint##sz##_t x, uint##sz##_t y) \
472
{ \
473
u_long rflags; \
474
\
475
__asm __volatile("and %2,%1; pushfq; popq %0" : \
476
"=r" (rflags), "+r" (x) : "m" (y)); \
477
return (rflags); \
478
} struct __hack
479
480
GETANDFLAGS(8);
481
GETANDFLAGS(16);
482
GETANDFLAGS(32);
483
GETANDFLAGS(64);
484
485
static u_long
486
getandflags(int opsize, uint64_t x, uint64_t y)
487
{
488
KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
489
("getandflags: invalid operand size %d", opsize));
490
491
if (opsize == 1)
492
return (getandflags8(x, y));
493
else if (opsize == 2)
494
return (getandflags16(x, y));
495
else if (opsize == 4)
496
return (getandflags32(x, y));
497
else
498
return (getandflags64(x, y));
499
}
500
501
static int
502
emulate_mov(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
503
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
504
{
505
int error, size;
506
enum vm_reg_name reg;
507
uint8_t byte;
508
uint64_t val;
509
510
size = vie->opsize;
511
error = EINVAL;
512
513
switch (vie->op.op_byte) {
514
case 0x88:
515
/*
516
* MOV byte from reg (ModRM:reg) to mem (ModRM:r/m)
517
* 88/r: mov r/m8, r8
518
* REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available)
519
*/
520
size = 1; /* override for byte operation */
521
error = vie_read_bytereg(vcpu, vie, &byte);
522
if (error == 0)
523
error = memwrite(vcpu, gpa, byte, size, arg);
524
break;
525
case 0x89:
526
/*
527
* MOV from reg (ModRM:reg) to mem (ModRM:r/m)
528
* 89/r: mov r/m16, r16
529
* 89/r: mov r/m32, r32
530
* REX.W + 89/r mov r/m64, r64
531
*/
532
reg = gpr_map[vie->reg];
533
error = vie_read_register(vcpu, reg, &val);
534
if (error == 0) {
535
val &= size2mask[size];
536
error = memwrite(vcpu, gpa, val, size, arg);
537
}
538
break;
539
case 0x8A:
540
/*
541
* MOV byte from mem (ModRM:r/m) to reg (ModRM:reg)
542
* 8A/r: mov r8, r/m8
543
* REX + 8A/r: mov r8, r/m8
544
*/
545
size = 1; /* override for byte operation */
546
error = memread(vcpu, gpa, &val, size, arg);
547
if (error == 0)
548
error = vie_write_bytereg(vcpu, vie, val);
549
break;
550
case 0x8B:
551
/*
552
* MOV from mem (ModRM:r/m) to reg (ModRM:reg)
553
* 8B/r: mov r16, r/m16
554
* 8B/r: mov r32, r/m32
555
* REX.W 8B/r: mov r64, r/m64
556
*/
557
error = memread(vcpu, gpa, &val, size, arg);
558
if (error == 0) {
559
reg = gpr_map[vie->reg];
560
error = vie_update_register(vcpu, reg, val, size);
561
}
562
break;
563
case 0xA1:
564
/*
565
* MOV from seg:moffset to AX/EAX/RAX
566
* A1: mov AX, moffs16
567
* A1: mov EAX, moffs32
568
* REX.W + A1: mov RAX, moffs64
569
*/
570
error = memread(vcpu, gpa, &val, size, arg);
571
if (error == 0) {
572
reg = VM_REG_GUEST_RAX;
573
error = vie_update_register(vcpu, reg, val, size);
574
}
575
break;
576
case 0xA3:
577
/*
578
* MOV from AX/EAX/RAX to seg:moffset
579
* A3: mov moffs16, AX
580
* A3: mov moffs32, EAX
581
* REX.W + A3: mov moffs64, RAX
582
*/
583
error = vie_read_register(vcpu, VM_REG_GUEST_RAX, &val);
584
if (error == 0) {
585
val &= size2mask[size];
586
error = memwrite(vcpu, gpa, val, size, arg);
587
}
588
break;
589
case 0xC6:
590
/*
591
* MOV from imm8 to mem (ModRM:r/m)
592
* C6/0 mov r/m8, imm8
593
* REX + C6/0 mov r/m8, imm8
594
*/
595
size = 1; /* override for byte operation */
596
error = memwrite(vcpu, gpa, vie->immediate, size, arg);
597
break;
598
case 0xC7:
599
/*
600
* MOV from imm16/imm32 to mem (ModRM:r/m)
601
* C7/0 mov r/m16, imm16
602
* C7/0 mov r/m32, imm32
603
* REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits)
604
*/
605
val = vie->immediate & size2mask[size];
606
error = memwrite(vcpu, gpa, val, size, arg);
607
break;
608
default:
609
break;
610
}
611
612
return (error);
613
}
614
615
static int
616
emulate_movx(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
617
mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg)
618
{
619
int error, size;
620
enum vm_reg_name reg;
621
uint64_t val;
622
623
size = vie->opsize;
624
error = EINVAL;
625
626
switch (vie->op.op_byte) {
627
case 0xB6:
628
/*
629
* MOV and zero extend byte from mem (ModRM:r/m) to
630
* reg (ModRM:reg).
631
*
632
* 0F B6/r movzx r16, r/m8
633
* 0F B6/r movzx r32, r/m8
634
* REX.W + 0F B6/r movzx r64, r/m8
635
*/
636
637
/* get the first operand */
638
error = memread(vcpu, gpa, &val, 1, arg);
639
if (error)
640
break;
641
642
/* get the second operand */
643
reg = gpr_map[vie->reg];
644
645
/* zero-extend byte */
646
val = (uint8_t)val;
647
648
/* write the result */
649
error = vie_update_register(vcpu, reg, val, size);
650
break;
651
case 0xB7:
652
/*
653
* MOV and zero extend word from mem (ModRM:r/m) to
654
* reg (ModRM:reg).
655
*
656
* 0F B7/r movzx r32, r/m16
657
* REX.W + 0F B7/r movzx r64, r/m16
658
*/
659
error = memread(vcpu, gpa, &val, 2, arg);
660
if (error)
661
return (error);
662
663
reg = gpr_map[vie->reg];
664
665
/* zero-extend word */
666
val = (uint16_t)val;
667
668
error = vie_update_register(vcpu, reg, val, size);
669
break;
670
case 0xBE:
671
/*
672
* MOV and sign extend byte from mem (ModRM:r/m) to
673
* reg (ModRM:reg).
674
*
675
* 0F BE/r movsx r16, r/m8
676
* 0F BE/r movsx r32, r/m8
677
* REX.W + 0F BE/r movsx r64, r/m8
678
*/
679
680
/* get the first operand */
681
error = memread(vcpu, gpa, &val, 1, arg);
682
if (error)
683
break;
684
685
/* get the second operand */
686
reg = gpr_map[vie->reg];
687
688
/* sign extend byte */
689
val = (int8_t)val;
690
691
/* write the result */
692
error = vie_update_register(vcpu, reg, val, size);
693
break;
694
default:
695
break;
696
}
697
return (error);
698
}
699
700
/*
701
* Helper function to calculate and validate a linear address.
702
*/
703
static int
704
get_gla(struct vcpu *vcpu, struct vie *vie __unused,
705
struct vm_guest_paging *paging, int opsize, int addrsize, int prot,
706
enum vm_reg_name seg, enum vm_reg_name gpr, uint64_t *gla, int *fault)
707
{
708
struct seg_desc desc;
709
uint64_t cr0, val, rflags;
710
int error __diagused;
711
712
error = vie_read_register(vcpu, VM_REG_GUEST_CR0, &cr0);
713
KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
714
715
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
716
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
717
718
error = vm_get_seg_desc(vcpu, seg, &desc);
719
KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
720
__func__, error, seg));
721
722
error = vie_read_register(vcpu, gpr, &val);
723
KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
724
error, gpr));
725
726
if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize,
727
addrsize, prot, gla)) {
728
if (seg == VM_REG_GUEST_SS)
729
vm_inject_ss(vcpu, 0);
730
else
731
vm_inject_gp(vcpu);
732
goto guest_fault;
733
}
734
735
if (vie_canonical_check(paging->cpu_mode, *gla)) {
736
if (seg == VM_REG_GUEST_SS)
737
vm_inject_ss(vcpu, 0);
738
else
739
vm_inject_gp(vcpu);
740
goto guest_fault;
741
}
742
743
if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
744
vm_inject_ac(vcpu, 0);
745
goto guest_fault;
746
}
747
748
*fault = 0;
749
return (0);
750
751
guest_fault:
752
*fault = 1;
753
return (0);
754
}
755
756
static int
757
emulate_movs(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
758
struct vm_guest_paging *paging, mem_region_read_t memread,
759
mem_region_write_t memwrite, void *arg)
760
{
761
#ifdef _KERNEL
762
struct vm_copyinfo copyinfo[2];
763
#else
764
struct iovec copyinfo[2];
765
#endif
766
uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val;
767
uint64_t rcx, rdi, rsi, rflags;
768
int error, fault, opsize, seg, repeat;
769
770
opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
771
val = 0;
772
error = 0;
773
774
/*
775
* XXX although the MOVS instruction is only supposed to be used with
776
* the "rep" prefix some guests like FreeBSD will use "repnz" instead.
777
*
778
* Empirically the "repnz" prefix has identical behavior to "rep"
779
* and the zero flag does not make a difference.
780
*/
781
repeat = vie->repz_present | vie->repnz_present;
782
783
if (repeat) {
784
error = vie_read_register(vcpu, VM_REG_GUEST_RCX, &rcx);
785
KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
786
787
/*
788
* The count register is %rcx, %ecx or %cx depending on the
789
* address size of the instruction.
790
*/
791
if ((rcx & vie_size2mask(vie->addrsize)) == 0) {
792
error = 0;
793
goto done;
794
}
795
}
796
797
/*
798
* Source Destination Comments
799
* --------------------------------------------
800
* (1) memory memory n/a
801
* (2) memory mmio emulated
802
* (3) mmio memory emulated
803
* (4) mmio mmio emulated
804
*
805
* At this point we don't have sufficient information to distinguish
806
* between (2), (3) and (4). We use 'vm_copy_setup()' to tease this
807
* out because it will succeed only when operating on regular memory.
808
*
809
* XXX the emulation doesn't properly handle the case where 'gpa'
810
* is straddling the boundary between the normal memory and MMIO.
811
*/
812
813
seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
814
error = get_gla(vcpu, vie, paging, opsize, vie->addrsize,
815
PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault);
816
if (error || fault)
817
goto done;
818
819
error = vm_copy_setup(vcpu, paging, srcaddr, opsize, PROT_READ,
820
copyinfo, nitems(copyinfo), &fault);
821
if (error == 0) {
822
if (fault)
823
goto done; /* Resume guest to handle fault */
824
825
/*
826
* case (2): read from system memory and write to mmio.
827
*/
828
vm_copyin(copyinfo, &val, opsize);
829
vm_copy_teardown(copyinfo, nitems(copyinfo));
830
error = memwrite(vcpu, gpa, val, opsize, arg);
831
if (error)
832
goto done;
833
} else {
834
/*
835
* 'vm_copy_setup()' is expected to fail for cases (3) and (4)
836
* if 'srcaddr' is in the mmio space.
837
*/
838
839
error = get_gla(vcpu, vie, paging, opsize, vie->addrsize,
840
PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr,
841
&fault);
842
if (error || fault)
843
goto done;
844
845
error = vm_copy_setup(vcpu, paging, dstaddr, opsize,
846
PROT_WRITE, copyinfo, nitems(copyinfo), &fault);
847
if (error == 0) {
848
if (fault)
849
goto done; /* Resume guest to handle fault */
850
851
/*
852
* case (3): read from MMIO and write to system memory.
853
*
854
* A MMIO read can have side-effects so we
855
* commit to it only after vm_copy_setup() is
856
* successful. If a page-fault needs to be
857
* injected into the guest then it will happen
858
* before the MMIO read is attempted.
859
*/
860
error = memread(vcpu, gpa, &val, opsize, arg);
861
if (error)
862
goto done;
863
864
vm_copyout(&val, copyinfo, opsize);
865
vm_copy_teardown(copyinfo, nitems(copyinfo));
866
} else {
867
/*
868
* Case (4): read from and write to mmio.
869
*
870
* Commit to the MMIO read/write (with potential
871
* side-effects) only after we are sure that the
872
* instruction is not going to be restarted due
873
* to address translation faults.
874
*/
875
error = vm_gla2gpa(vcpu, paging, srcaddr,
876
PROT_READ, &srcgpa, &fault);
877
if (error || fault)
878
goto done;
879
880
error = vm_gla2gpa(vcpu, paging, dstaddr,
881
PROT_WRITE, &dstgpa, &fault);
882
if (error || fault)
883
goto done;
884
885
error = memread(vcpu, srcgpa, &val, opsize, arg);
886
if (error)
887
goto done;
888
889
error = memwrite(vcpu, dstgpa, val, opsize, arg);
890
if (error)
891
goto done;
892
}
893
}
894
895
error = vie_read_register(vcpu, VM_REG_GUEST_RSI, &rsi);
896
KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
897
898
error = vie_read_register(vcpu, VM_REG_GUEST_RDI, &rdi);
899
KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
900
901
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
902
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
903
904
if (rflags & PSL_D) {
905
rsi -= opsize;
906
rdi -= opsize;
907
} else {
908
rsi += opsize;
909
rdi += opsize;
910
}
911
912
error = vie_update_register(vcpu, VM_REG_GUEST_RSI, rsi,
913
vie->addrsize);
914
KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error));
915
916
error = vie_update_register(vcpu, VM_REG_GUEST_RDI, rdi,
917
vie->addrsize);
918
KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
919
920
if (repeat) {
921
rcx = rcx - 1;
922
error = vie_update_register(vcpu, VM_REG_GUEST_RCX,
923
rcx, vie->addrsize);
924
KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
925
926
/*
927
* Repeat the instruction if the count register is not zero.
928
*/
929
if ((rcx & vie_size2mask(vie->addrsize)) != 0)
930
vm_restart_instruction(vcpu);
931
}
932
done:
933
KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d",
934
__func__, error));
935
return (error);
936
}
937
938
static int
939
emulate_stos(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
940
struct vm_guest_paging *paging __unused, mem_region_read_t memread __unused,
941
mem_region_write_t memwrite, void *arg)
942
{
943
int error, opsize, repeat;
944
uint64_t val;
945
uint64_t rcx, rdi, rflags;
946
947
opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize;
948
repeat = vie->repz_present | vie->repnz_present;
949
950
if (repeat) {
951
error = vie_read_register(vcpu, VM_REG_GUEST_RCX, &rcx);
952
KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
953
954
/*
955
* The count register is %rcx, %ecx or %cx depending on the
956
* address size of the instruction.
957
*/
958
if ((rcx & vie_size2mask(vie->addrsize)) == 0)
959
return (0);
960
}
961
962
error = vie_read_register(vcpu, VM_REG_GUEST_RAX, &val);
963
KASSERT(!error, ("%s: error %d getting rax", __func__, error));
964
965
error = memwrite(vcpu, gpa, val, opsize, arg);
966
if (error)
967
return (error);
968
969
error = vie_read_register(vcpu, VM_REG_GUEST_RDI, &rdi);
970
KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
971
972
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
973
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
974
975
if (rflags & PSL_D)
976
rdi -= opsize;
977
else
978
rdi += opsize;
979
980
error = vie_update_register(vcpu, VM_REG_GUEST_RDI, rdi,
981
vie->addrsize);
982
KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
983
984
if (repeat) {
985
rcx = rcx - 1;
986
error = vie_update_register(vcpu, VM_REG_GUEST_RCX,
987
rcx, vie->addrsize);
988
KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
989
990
/*
991
* Repeat the instruction if the count register is not zero.
992
*/
993
if ((rcx & vie_size2mask(vie->addrsize)) != 0)
994
vm_restart_instruction(vcpu);
995
}
996
997
return (0);
998
}
999
1000
static int
1001
emulate_and(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1002
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
1003
{
1004
int error, size;
1005
enum vm_reg_name reg;
1006
uint64_t result, rflags, rflags2, val1, val2;
1007
1008
size = vie->opsize;
1009
error = EINVAL;
1010
1011
switch (vie->op.op_byte) {
1012
case 0x23:
1013
/*
1014
* AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
1015
* result in reg.
1016
*
1017
* 23/r and r16, r/m16
1018
* 23/r and r32, r/m32
1019
* REX.W + 23/r and r64, r/m64
1020
*/
1021
1022
/* get the first operand */
1023
reg = gpr_map[vie->reg];
1024
error = vie_read_register(vcpu, reg, &val1);
1025
if (error)
1026
break;
1027
1028
/* get the second operand */
1029
error = memread(vcpu, gpa, &val2, size, arg);
1030
if (error)
1031
break;
1032
1033
/* perform the operation and write the result */
1034
result = val1 & val2;
1035
error = vie_update_register(vcpu, reg, result, size);
1036
break;
1037
case 0x81:
1038
case 0x83:
1039
/*
1040
* AND mem (ModRM:r/m) with immediate and store the
1041
* result in mem.
1042
*
1043
* 81 /4 and r/m16, imm16
1044
* 81 /4 and r/m32, imm32
1045
* REX.W + 81 /4 and r/m64, imm32 sign-extended to 64
1046
*
1047
* 83 /4 and r/m16, imm8 sign-extended to 16
1048
* 83 /4 and r/m32, imm8 sign-extended to 32
1049
* REX.W + 83/4 and r/m64, imm8 sign-extended to 64
1050
*/
1051
1052
/* get the first operand */
1053
error = memread(vcpu, gpa, &val1, size, arg);
1054
if (error)
1055
break;
1056
1057
/*
1058
* perform the operation with the pre-fetched immediate
1059
* operand and write the result
1060
*/
1061
result = val1 & vie->immediate;
1062
error = memwrite(vcpu, gpa, result, size, arg);
1063
break;
1064
default:
1065
break;
1066
}
1067
if (error)
1068
return (error);
1069
1070
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
1071
if (error)
1072
return (error);
1073
1074
/*
1075
* OF and CF are cleared; the SF, ZF and PF flags are set according
1076
* to the result; AF is undefined.
1077
*
1078
* The updated status flags are obtained by subtracting 0 from 'result'.
1079
*/
1080
rflags2 = getcc(size, result, 0);
1081
rflags &= ~RFLAGS_STATUS_BITS;
1082
rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
1083
1084
error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8);
1085
return (error);
1086
}
1087
1088
static int
1089
emulate_or(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1090
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
1091
{
1092
int error, size;
1093
enum vm_reg_name reg;
1094
uint64_t result, rflags, rflags2, val1, val2;
1095
1096
size = vie->opsize;
1097
error = EINVAL;
1098
1099
switch (vie->op.op_byte) {
1100
case 0x0B:
1101
/*
1102
* OR reg (ModRM:reg) and mem (ModRM:r/m) and store the
1103
* result in reg.
1104
*
1105
* 0b/r or r16, r/m16
1106
* 0b/r or r32, r/m32
1107
* REX.W + 0b/r or r64, r/m64
1108
*/
1109
1110
/* get the first operand */
1111
reg = gpr_map[vie->reg];
1112
error = vie_read_register(vcpu, reg, &val1);
1113
if (error)
1114
break;
1115
1116
/* get the second operand */
1117
error = memread(vcpu, gpa, &val2, size, arg);
1118
if (error)
1119
break;
1120
1121
/* perform the operation and write the result */
1122
result = val1 | val2;
1123
error = vie_update_register(vcpu, reg, result, size);
1124
break;
1125
case 0x81:
1126
case 0x83:
1127
/*
1128
* OR mem (ModRM:r/m) with immediate and store the
1129
* result in mem.
1130
*
1131
* 81 /1 or r/m16, imm16
1132
* 81 /1 or r/m32, imm32
1133
* REX.W + 81 /1 or r/m64, imm32 sign-extended to 64
1134
*
1135
* 83 /1 or r/m16, imm8 sign-extended to 16
1136
* 83 /1 or r/m32, imm8 sign-extended to 32
1137
* REX.W + 83/1 or r/m64, imm8 sign-extended to 64
1138
*/
1139
1140
/* get the first operand */
1141
error = memread(vcpu, gpa, &val1, size, arg);
1142
if (error)
1143
break;
1144
1145
/*
1146
* perform the operation with the pre-fetched immediate
1147
* operand and write the result
1148
*/
1149
result = val1 | vie->immediate;
1150
error = memwrite(vcpu, gpa, result, size, arg);
1151
break;
1152
default:
1153
break;
1154
}
1155
if (error)
1156
return (error);
1157
1158
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
1159
if (error)
1160
return (error);
1161
1162
/*
1163
* OF and CF are cleared; the SF, ZF and PF flags are set according
1164
* to the result; AF is undefined.
1165
*
1166
* The updated status flags are obtained by subtracting 0 from 'result'.
1167
*/
1168
rflags2 = getcc(size, result, 0);
1169
rflags &= ~RFLAGS_STATUS_BITS;
1170
rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
1171
1172
error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8);
1173
return (error);
1174
}
1175
1176
static int
1177
emulate_cmp(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1178
mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg)
1179
{
1180
int error, size;
1181
uint64_t regop, memop, op1, op2, rflags, rflags2;
1182
enum vm_reg_name reg;
1183
1184
size = vie->opsize;
1185
switch (vie->op.op_byte) {
1186
case 0x39:
1187
case 0x3B:
1188
/*
1189
* 39/r CMP r/m16, r16
1190
* 39/r CMP r/m32, r32
1191
* REX.W 39/r CMP r/m64, r64
1192
*
1193
* 3B/r CMP r16, r/m16
1194
* 3B/r CMP r32, r/m32
1195
* REX.W + 3B/r CMP r64, r/m64
1196
*
1197
* Compare the first operand with the second operand and
1198
* set status flags in EFLAGS register. The comparison is
1199
* performed by subtracting the second operand from the first
1200
* operand and then setting the status flags.
1201
*/
1202
1203
/* Get the register operand */
1204
reg = gpr_map[vie->reg];
1205
error = vie_read_register(vcpu, reg, &regop);
1206
if (error)
1207
return (error);
1208
1209
/* Get the memory operand */
1210
error = memread(vcpu, gpa, &memop, size, arg);
1211
if (error)
1212
return (error);
1213
1214
if (vie->op.op_byte == 0x3B) {
1215
op1 = regop;
1216
op2 = memop;
1217
} else {
1218
op1 = memop;
1219
op2 = regop;
1220
}
1221
rflags2 = getcc(size, op1, op2);
1222
break;
1223
case 0x80:
1224
case 0x81:
1225
case 0x83:
1226
/*
1227
* 80 /7 cmp r/m8, imm8
1228
* REX + 80 /7 cmp r/m8, imm8
1229
*
1230
* 81 /7 cmp r/m16, imm16
1231
* 81 /7 cmp r/m32, imm32
1232
* REX.W + 81 /7 cmp r/m64, imm32 sign-extended to 64
1233
*
1234
* 83 /7 cmp r/m16, imm8 sign-extended to 16
1235
* 83 /7 cmp r/m32, imm8 sign-extended to 32
1236
* REX.W + 83 /7 cmp r/m64, imm8 sign-extended to 64
1237
*
1238
* Compare mem (ModRM:r/m) with immediate and set
1239
* status flags according to the results. The
1240
* comparison is performed by subtracting the
1241
* immediate from the first operand and then setting
1242
* the status flags.
1243
*
1244
*/
1245
if (vie->op.op_byte == 0x80)
1246
size = 1;
1247
1248
/* get the first operand */
1249
error = memread(vcpu, gpa, &op1, size, arg);
1250
if (error)
1251
return (error);
1252
1253
rflags2 = getcc(size, op1, vie->immediate);
1254
break;
1255
default:
1256
return (EINVAL);
1257
}
1258
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
1259
if (error)
1260
return (error);
1261
rflags &= ~RFLAGS_STATUS_BITS;
1262
rflags |= rflags2 & RFLAGS_STATUS_BITS;
1263
1264
error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8);
1265
return (error);
1266
}
1267
1268
static int
1269
emulate_test(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1270
mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg)
1271
{
1272
int error, size;
1273
uint64_t op1, rflags, rflags2;
1274
1275
size = vie->opsize;
1276
error = EINVAL;
1277
1278
switch (vie->op.op_byte) {
1279
case 0xF6:
1280
/*
1281
* F6 /0 test r/m8, imm8
1282
*/
1283
size = 1; /* override for byte operation */
1284
/* FALLTHROUGH */
1285
case 0xF7:
1286
/*
1287
* F7 /0 test r/m16, imm16
1288
* F7 /0 test r/m32, imm32
1289
* REX.W + F7 /0 test r/m64, imm32 sign-extended to 64
1290
*
1291
* Test mem (ModRM:r/m) with immediate and set status
1292
* flags according to the results. The comparison is
1293
* performed by anding the immediate from the first
1294
* operand and then setting the status flags.
1295
*/
1296
if ((vie->reg & 7) != 0)
1297
return (EINVAL);
1298
1299
error = memread(vcpu, gpa, &op1, size, arg);
1300
if (error)
1301
return (error);
1302
1303
rflags2 = getandflags(size, op1, vie->immediate);
1304
break;
1305
default:
1306
return (EINVAL);
1307
}
1308
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
1309
if (error)
1310
return (error);
1311
1312
/*
1313
* OF and CF are cleared; the SF, ZF and PF flags are set according
1314
* to the result; AF is undefined.
1315
*/
1316
rflags &= ~RFLAGS_STATUS_BITS;
1317
rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
1318
1319
error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8);
1320
return (error);
1321
}
1322
1323
static int
1324
emulate_bextr(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1325
struct vm_guest_paging *paging, mem_region_read_t memread,
1326
mem_region_write_t memwrite __unused, void *arg)
1327
{
1328
uint64_t src1, src2, dst, rflags;
1329
unsigned start, len, size;
1330
int error;
1331
1332
size = vie->opsize;
1333
error = EINVAL;
1334
1335
/*
1336
* VEX.LZ.0F38.W0 F7 /r BEXTR r32a, r/m32, r32b
1337
* VEX.LZ.0F38.W1 F7 /r BEXTR r64a, r/m64, r64b
1338
*
1339
* Destination operand is ModRM:reg. Source operands are ModRM:r/m and
1340
* Vex.vvvv.
1341
*
1342
* Operand size is always 32-bit if not in 64-bit mode (W1 is ignored).
1343
*/
1344
if (size != 4 && paging->cpu_mode != CPU_MODE_64BIT)
1345
size = 4;
1346
1347
/*
1348
* Extracts contiguous bits from the first /source/ operand (second
1349
* operand) using an index and length specified in the second /source/
1350
* operand (third operand).
1351
*/
1352
error = memread(vcpu, gpa, &src1, size, arg);
1353
if (error)
1354
return (error);
1355
error = vie_read_register(vcpu, gpr_map[vie->vex_reg], &src2);
1356
if (error)
1357
return (error);
1358
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
1359
if (error)
1360
return (error);
1361
1362
start = (src2 & 0xff);
1363
len = (src2 & 0xff00) >> 8;
1364
1365
/* If no bits are extracted, the destination register is cleared. */
1366
dst = 0;
1367
1368
/* If START exceeds the operand size, no bits are extracted. */
1369
if (start > size * 8)
1370
goto done;
1371
/* Length is bounded by both the destination size and start offset. */
1372
if (start + len > size * 8)
1373
len = (size * 8) - start;
1374
if (len == 0)
1375
goto done;
1376
1377
if (start > 0)
1378
src1 = (src1 >> start);
1379
if (len < 64)
1380
src1 = src1 & ((1ull << len) - 1);
1381
dst = src1;
1382
1383
done:
1384
error = vie_update_register(vcpu, gpr_map[vie->reg], dst, size);
1385
if (error)
1386
return (error);
1387
1388
/*
1389
* AMD: OF, CF cleared; SF/AF/PF undefined; ZF set by result.
1390
* Intel: ZF is set by result; AF/SF/PF undefined; all others cleared.
1391
*/
1392
rflags &= ~RFLAGS_STATUS_BITS;
1393
if (dst == 0)
1394
rflags |= PSL_Z;
1395
error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags,
1396
8);
1397
return (error);
1398
}
1399
1400
static int
1401
emulate_add(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1402
mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg)
1403
{
1404
int error, size;
1405
uint64_t nval, rflags, rflags2, val1, val2;
1406
enum vm_reg_name reg;
1407
1408
size = vie->opsize;
1409
error = EINVAL;
1410
1411
switch (vie->op.op_byte) {
1412
case 0x03:
1413
/*
1414
* ADD r/m to r and store the result in r
1415
*
1416
* 03/r ADD r16, r/m16
1417
* 03/r ADD r32, r/m32
1418
* REX.W + 03/r ADD r64, r/m64
1419
*/
1420
1421
/* get the first operand */
1422
reg = gpr_map[vie->reg];
1423
error = vie_read_register(vcpu, reg, &val1);
1424
if (error)
1425
break;
1426
1427
/* get the second operand */
1428
error = memread(vcpu, gpa, &val2, size, arg);
1429
if (error)
1430
break;
1431
1432
/* perform the operation and write the result */
1433
nval = val1 + val2;
1434
error = vie_update_register(vcpu, reg, nval, size);
1435
break;
1436
default:
1437
break;
1438
}
1439
1440
if (!error) {
1441
rflags2 = getaddflags(size, val1, val2);
1442
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS,
1443
&rflags);
1444
if (error)
1445
return (error);
1446
1447
rflags &= ~RFLAGS_STATUS_BITS;
1448
rflags |= rflags2 & RFLAGS_STATUS_BITS;
1449
error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS,
1450
rflags, 8);
1451
}
1452
1453
return (error);
1454
}
1455
1456
static int
1457
emulate_sub(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1458
mem_region_read_t memread, mem_region_write_t memwrite __unused, void *arg)
1459
{
1460
int error, size;
1461
uint64_t nval, rflags, rflags2, val1, val2;
1462
enum vm_reg_name reg;
1463
1464
size = vie->opsize;
1465
error = EINVAL;
1466
1467
switch (vie->op.op_byte) {
1468
case 0x2B:
1469
/*
1470
* SUB r/m from r and store the result in r
1471
*
1472
* 2B/r SUB r16, r/m16
1473
* 2B/r SUB r32, r/m32
1474
* REX.W + 2B/r SUB r64, r/m64
1475
*/
1476
1477
/* get the first operand */
1478
reg = gpr_map[vie->reg];
1479
error = vie_read_register(vcpu, reg, &val1);
1480
if (error)
1481
break;
1482
1483
/* get the second operand */
1484
error = memread(vcpu, gpa, &val2, size, arg);
1485
if (error)
1486
break;
1487
1488
/* perform the operation and write the result */
1489
nval = val1 - val2;
1490
error = vie_update_register(vcpu, reg, nval, size);
1491
break;
1492
default:
1493
break;
1494
}
1495
1496
if (!error) {
1497
rflags2 = getcc(size, val1, val2);
1498
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS,
1499
&rflags);
1500
if (error)
1501
return (error);
1502
1503
rflags &= ~RFLAGS_STATUS_BITS;
1504
rflags |= rflags2 & RFLAGS_STATUS_BITS;
1505
error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS,
1506
rflags, 8);
1507
}
1508
1509
return (error);
1510
}
1511
1512
static int
1513
emulate_stack_op(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie,
1514
struct vm_guest_paging *paging, mem_region_read_t memread,
1515
mem_region_write_t memwrite, void *arg)
1516
{
1517
#ifdef _KERNEL
1518
struct vm_copyinfo copyinfo[2];
1519
#else
1520
struct iovec copyinfo[2];
1521
#endif
1522
struct seg_desc ss_desc;
1523
uint64_t cr0, rflags, rsp, stack_gla, val;
1524
int error, fault, size, stackaddrsize, pushop;
1525
1526
val = 0;
1527
size = vie->opsize;
1528
pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0;
1529
1530
/*
1531
* From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
1532
*/
1533
if (paging->cpu_mode == CPU_MODE_REAL) {
1534
stackaddrsize = 2;
1535
} else if (paging->cpu_mode == CPU_MODE_64BIT) {
1536
/*
1537
* "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3
1538
* - Stack pointer size is always 64-bits.
1539
* - PUSH/POP of 32-bit values is not possible in 64-bit mode.
1540
* - 16-bit PUSH/POP is supported by using the operand size
1541
* override prefix (66H).
1542
*/
1543
stackaddrsize = 8;
1544
size = vie->opsize_override ? 2 : 8;
1545
} else {
1546
/*
1547
* In protected or compatibility mode the 'B' flag in the
1548
* stack-segment descriptor determines the size of the
1549
* stack pointer.
1550
*/
1551
error = vm_get_seg_desc(vcpu, VM_REG_GUEST_SS, &ss_desc);
1552
KASSERT(error == 0, ("%s: error %d getting SS descriptor",
1553
__func__, error));
1554
if (SEG_DESC_DEF32(ss_desc.access))
1555
stackaddrsize = 4;
1556
else
1557
stackaddrsize = 2;
1558
}
1559
1560
error = vie_read_register(vcpu, VM_REG_GUEST_CR0, &cr0);
1561
KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
1562
1563
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
1564
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
1565
1566
error = vie_read_register(vcpu, VM_REG_GUEST_RSP, &rsp);
1567
KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
1568
if (pushop) {
1569
rsp -= size;
1570
}
1571
1572
if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc,
1573
rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ,
1574
&stack_gla)) {
1575
vm_inject_ss(vcpu, 0);
1576
return (0);
1577
}
1578
1579
if (vie_canonical_check(paging->cpu_mode, stack_gla)) {
1580
vm_inject_ss(vcpu, 0);
1581
return (0);
1582
}
1583
1584
if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) {
1585
vm_inject_ac(vcpu, 0);
1586
return (0);
1587
}
1588
1589
error = vm_copy_setup(vcpu, paging, stack_gla, size,
1590
pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo),
1591
&fault);
1592
if (error || fault)
1593
return (error);
1594
1595
if (pushop) {
1596
error = memread(vcpu, mmio_gpa, &val, size, arg);
1597
if (error == 0)
1598
vm_copyout(&val, copyinfo, size);
1599
} else {
1600
vm_copyin(copyinfo, &val, size);
1601
error = memwrite(vcpu, mmio_gpa, val, size, arg);
1602
rsp += size;
1603
}
1604
vm_copy_teardown(copyinfo, nitems(copyinfo));
1605
1606
if (error == 0) {
1607
error = vie_update_register(vcpu, VM_REG_GUEST_RSP, rsp,
1608
stackaddrsize);
1609
KASSERT(error == 0, ("error %d updating rsp", error));
1610
}
1611
return (error);
1612
}
1613
1614
static int
1615
emulate_push(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie,
1616
struct vm_guest_paging *paging, mem_region_read_t memread,
1617
mem_region_write_t memwrite, void *arg)
1618
{
1619
int error;
1620
1621
/*
1622
* Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
1623
*
1624
* PUSH is part of the group 5 extended opcodes and is identified
1625
* by ModRM:reg = b110.
1626
*/
1627
if ((vie->reg & 7) != 6)
1628
return (EINVAL);
1629
1630
error = emulate_stack_op(vcpu, mmio_gpa, vie, paging, memread,
1631
memwrite, arg);
1632
return (error);
1633
}
1634
1635
static int
1636
emulate_pop(struct vcpu *vcpu, uint64_t mmio_gpa, struct vie *vie,
1637
struct vm_guest_paging *paging, mem_region_read_t memread,
1638
mem_region_write_t memwrite, void *arg)
1639
{
1640
int error;
1641
1642
/*
1643
* Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
1644
*
1645
* POP is part of the group 1A extended opcodes and is identified
1646
* by ModRM:reg = b000.
1647
*/
1648
if ((vie->reg & 7) != 0)
1649
return (EINVAL);
1650
1651
error = emulate_stack_op(vcpu, mmio_gpa, vie, paging, memread,
1652
memwrite, arg);
1653
return (error);
1654
}
1655
1656
static int
1657
emulate_group1(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1658
struct vm_guest_paging *paging __unused, mem_region_read_t memread,
1659
mem_region_write_t memwrite, void *memarg)
1660
{
1661
int error;
1662
1663
switch (vie->reg & 7) {
1664
case 0x1: /* OR */
1665
error = emulate_or(vcpu, gpa, vie,
1666
memread, memwrite, memarg);
1667
break;
1668
case 0x4: /* AND */
1669
error = emulate_and(vcpu, gpa, vie,
1670
memread, memwrite, memarg);
1671
break;
1672
case 0x7: /* CMP */
1673
error = emulate_cmp(vcpu, gpa, vie,
1674
memread, memwrite, memarg);
1675
break;
1676
default:
1677
error = EINVAL;
1678
break;
1679
}
1680
1681
return (error);
1682
}
1683
1684
static int
1685
emulate_bittest(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1686
mem_region_read_t memread, mem_region_write_t memwrite __unused,
1687
void *memarg)
1688
{
1689
uint64_t val, rflags;
1690
int error, bitmask, bitoff;
1691
1692
/*
1693
* 0F BA is a Group 8 extended opcode.
1694
*
1695
* Currently we only emulate the 'Bit Test' instruction which is
1696
* identified by a ModR/M:reg encoding of 100b.
1697
*/
1698
if ((vie->reg & 7) != 4)
1699
return (EINVAL);
1700
1701
error = vie_read_register(vcpu, VM_REG_GUEST_RFLAGS, &rflags);
1702
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
1703
1704
error = memread(vcpu, gpa, &val, vie->opsize, memarg);
1705
if (error)
1706
return (error);
1707
1708
/*
1709
* Intel SDM, Vol 2, Table 3-2:
1710
* "Range of Bit Positions Specified by Bit Offset Operands"
1711
*/
1712
bitmask = vie->opsize * 8 - 1;
1713
bitoff = vie->immediate & bitmask;
1714
1715
/* Copy the bit into the Carry flag in %rflags */
1716
if (val & (1UL << bitoff))
1717
rflags |= PSL_C;
1718
else
1719
rflags &= ~PSL_C;
1720
1721
error = vie_update_register(vcpu, VM_REG_GUEST_RFLAGS, rflags, 8);
1722
KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error));
1723
1724
return (0);
1725
}
1726
1727
static int
1728
emulate_twob_group15(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1729
mem_region_read_t memread, mem_region_write_t memwrite __unused,
1730
void *memarg)
1731
{
1732
int error;
1733
uint64_t buf;
1734
1735
switch (vie->reg & 7) {
1736
case 0x7: /* CLFLUSH, CLFLUSHOPT, and SFENCE */
1737
if (vie->mod == 0x3) {
1738
/*
1739
* SFENCE. Ignore it, VM exit provides enough
1740
* barriers on its own.
1741
*/
1742
error = 0;
1743
} else {
1744
/*
1745
* CLFLUSH, CLFLUSHOPT. Only check for access
1746
* rights.
1747
*/
1748
error = memread(vcpu, gpa, &buf, 1, memarg);
1749
}
1750
break;
1751
default:
1752
error = EINVAL;
1753
break;
1754
}
1755
1756
return (error);
1757
}
1758
1759
int
1760
vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
1761
struct vm_guest_paging *paging, mem_region_read_t memread,
1762
mem_region_write_t memwrite, void *memarg)
1763
{
1764
int error;
1765
1766
if (!vie->decoded)
1767
return (EINVAL);
1768
1769
switch (vie->op.op_type) {
1770
case VIE_OP_TYPE_GROUP1:
1771
error = emulate_group1(vcpu, gpa, vie, paging, memread,
1772
memwrite, memarg);
1773
break;
1774
case VIE_OP_TYPE_POP:
1775
error = emulate_pop(vcpu, gpa, vie, paging, memread,
1776
memwrite, memarg);
1777
break;
1778
case VIE_OP_TYPE_PUSH:
1779
error = emulate_push(vcpu, gpa, vie, paging, memread,
1780
memwrite, memarg);
1781
break;
1782
case VIE_OP_TYPE_CMP:
1783
error = emulate_cmp(vcpu, gpa, vie,
1784
memread, memwrite, memarg);
1785
break;
1786
case VIE_OP_TYPE_MOV:
1787
error = emulate_mov(vcpu, gpa, vie,
1788
memread, memwrite, memarg);
1789
break;
1790
case VIE_OP_TYPE_MOVSX:
1791
case VIE_OP_TYPE_MOVZX:
1792
error = emulate_movx(vcpu, gpa, vie,
1793
memread, memwrite, memarg);
1794
break;
1795
case VIE_OP_TYPE_MOVS:
1796
error = emulate_movs(vcpu, gpa, vie, paging, memread,
1797
memwrite, memarg);
1798
break;
1799
case VIE_OP_TYPE_STOS:
1800
error = emulate_stos(vcpu, gpa, vie, paging, memread,
1801
memwrite, memarg);
1802
break;
1803
case VIE_OP_TYPE_AND:
1804
error = emulate_and(vcpu, gpa, vie,
1805
memread, memwrite, memarg);
1806
break;
1807
case VIE_OP_TYPE_OR:
1808
error = emulate_or(vcpu, gpa, vie,
1809
memread, memwrite, memarg);
1810
break;
1811
case VIE_OP_TYPE_SUB:
1812
error = emulate_sub(vcpu, gpa, vie,
1813
memread, memwrite, memarg);
1814
break;
1815
case VIE_OP_TYPE_BITTEST:
1816
error = emulate_bittest(vcpu, gpa, vie,
1817
memread, memwrite, memarg);
1818
break;
1819
case VIE_OP_TYPE_TWOB_GRP15:
1820
error = emulate_twob_group15(vcpu, gpa, vie,
1821
memread, memwrite, memarg);
1822
break;
1823
case VIE_OP_TYPE_ADD:
1824
error = emulate_add(vcpu, gpa, vie, memread,
1825
memwrite, memarg);
1826
break;
1827
case VIE_OP_TYPE_TEST:
1828
error = emulate_test(vcpu, gpa, vie,
1829
memread, memwrite, memarg);
1830
break;
1831
case VIE_OP_TYPE_BEXTR:
1832
error = emulate_bextr(vcpu, gpa, vie, paging,
1833
memread, memwrite, memarg);
1834
break;
1835
default:
1836
error = EINVAL;
1837
break;
1838
}
1839
1840
return (error);
1841
}
1842
1843
int
1844
vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
1845
{
1846
KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
1847
("%s: invalid size %d", __func__, size));
1848
KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl));
1849
1850
if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
1851
return (0);
1852
1853
return ((gla & (size - 1)) ? 1 : 0);
1854
}
1855
1856
int
1857
vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
1858
{
1859
uint64_t mask;
1860
1861
if (cpu_mode != CPU_MODE_64BIT)
1862
return (0);
1863
1864
/*
1865
* The value of the bit 47 in the 'gla' should be replicated in the
1866
* most significant 16 bits.
1867
*/
1868
mask = ~((1UL << 48) - 1);
1869
if (gla & (1UL << 47))
1870
return ((gla & mask) != mask);
1871
else
1872
return ((gla & mask) != 0);
1873
}
1874
1875
uint64_t
1876
vie_size2mask(int size)
1877
{
1878
KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
1879
("vie_size2mask: invalid size %d", size));
1880
return (size2mask[size]);
1881
}
1882
1883
int
1884
vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
1885
struct seg_desc *desc, uint64_t offset, int length, int addrsize,
1886
int prot, uint64_t *gla)
1887
{
1888
uint64_t firstoff, low_limit, high_limit, segbase;
1889
int glasize, type;
1890
1891
KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS,
1892
("%s: invalid segment %d", __func__, seg));
1893
KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
1894
("%s: invalid operand size %d", __func__, length));
1895
KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
1896
("%s: invalid prot %#x", __func__, prot));
1897
1898
firstoff = offset;
1899
if (cpu_mode == CPU_MODE_64BIT) {
1900
KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address "
1901
"size %d for cpu_mode %d", __func__, addrsize, cpu_mode));
1902
glasize = 8;
1903
} else {
1904
KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address "
1905
"size %d for cpu mode %d", __func__, addrsize, cpu_mode));
1906
glasize = 4;
1907
/*
1908
* If the segment selector is loaded with a NULL selector
1909
* then the descriptor is unusable and attempting to use
1910
* it results in a #GP(0).
1911
*/
1912
if (SEG_DESC_UNUSABLE(desc->access))
1913
return (-1);
1914
1915
/*
1916
* The processor generates a #NP exception when a segment
1917
* register is loaded with a selector that points to a
1918
* descriptor that is not present. If this was the case then
1919
* it would have been checked before the VM-exit.
1920
*/
1921
KASSERT(SEG_DESC_PRESENT(desc->access),
1922
("segment %d not present: %#x", seg, desc->access));
1923
1924
/*
1925
* The descriptor type must indicate a code/data segment.
1926
*/
1927
type = SEG_DESC_TYPE(desc->access);
1928
KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
1929
"descriptor type %#x", seg, type));
1930
1931
if (prot & PROT_READ) {
1932
/* #GP on a read access to a exec-only code segment */
1933
if ((type & 0xA) == 0x8)
1934
return (-1);
1935
}
1936
1937
if (prot & PROT_WRITE) {
1938
/*
1939
* #GP on a write access to a code segment or a
1940
* read-only data segment.
1941
*/
1942
if (type & 0x8) /* code segment */
1943
return (-1);
1944
1945
if ((type & 0xA) == 0) /* read-only data seg */
1946
return (-1);
1947
}
1948
1949
/*
1950
* 'desc->limit' is fully expanded taking granularity into
1951
* account.
1952
*/
1953
if ((type & 0xC) == 0x4) {
1954
/* expand-down data segment */
1955
low_limit = desc->limit + 1;
1956
high_limit = SEG_DESC_DEF32(desc->access) ?
1957
0xffffffff : 0xffff;
1958
} else {
1959
/* code segment or expand-up data segment */
1960
low_limit = 0;
1961
high_limit = desc->limit;
1962
}
1963
1964
while (length > 0) {
1965
offset &= vie_size2mask(addrsize);
1966
if (offset < low_limit || offset > high_limit)
1967
return (-1);
1968
offset++;
1969
length--;
1970
}
1971
}
1972
1973
/*
1974
* In 64-bit mode all segments except %fs and %gs have a segment
1975
* base address of 0.
1976
*/
1977
if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
1978
seg != VM_REG_GUEST_GS) {
1979
segbase = 0;
1980
} else {
1981
segbase = desc->base;
1982
}
1983
1984
/*
1985
* Truncate 'firstoff' to the effective address size before adding
1986
* it to the segment base.
1987
*/
1988
firstoff &= vie_size2mask(addrsize);
1989
*gla = (segbase + firstoff) & vie_size2mask(glasize);
1990
return (0);
1991
}
1992
1993
/*
1994
* Prepare a partially decoded vie for a 2nd attempt.
1995
*/
1996
void
1997
vie_restart(struct vie *vie)
1998
{
1999
_Static_assert(
2000
offsetof(struct vie, inst) < offsetof(struct vie, vie_startzero) &&
2001
offsetof(struct vie, num_valid) < offsetof(struct vie, vie_startzero),
2002
"restart should not erase instruction length or contents");
2003
2004
memset((char *)vie + offsetof(struct vie, vie_startzero), 0,
2005
sizeof(*vie) - offsetof(struct vie, vie_startzero));
2006
2007
vie->base_register = VM_REG_LAST;
2008
vie->index_register = VM_REG_LAST;
2009
vie->segment_register = VM_REG_LAST;
2010
}
2011
2012
void
2013
vie_init(struct vie *vie, const char *inst_bytes, int inst_length)
2014
{
2015
KASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE,
2016
("%s: invalid instruction length (%d)", __func__, inst_length));
2017
2018
vie_restart(vie);
2019
memset(vie->inst, 0, sizeof(vie->inst));
2020
if (inst_length != 0)
2021
memcpy(vie->inst, inst_bytes, inst_length);
2022
vie->num_valid = inst_length;
2023
}
2024
2025
#ifdef _KERNEL
2026
static int
2027
pf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
2028
{
2029
int error_code = 0;
2030
2031
if (pte & PG_V)
2032
error_code |= PGEX_P;
2033
if (prot & VM_PROT_WRITE)
2034
error_code |= PGEX_W;
2035
if (usermode)
2036
error_code |= PGEX_U;
2037
if (rsvd)
2038
error_code |= PGEX_RSV;
2039
if (prot & VM_PROT_EXECUTE)
2040
error_code |= PGEX_I;
2041
2042
return (error_code);
2043
}
2044
2045
static void
2046
ptp_release(void **cookie)
2047
{
2048
if (*cookie != NULL) {
2049
vm_gpa_release(*cookie);
2050
*cookie = NULL;
2051
}
2052
}
2053
2054
static void *
2055
ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
2056
{
2057
void *ptr;
2058
2059
ptp_release(cookie);
2060
ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie);
2061
return (ptr);
2062
}
2063
2064
static int
2065
_vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging,
2066
uint64_t gla, int prot, uint64_t *gpa, int *guest_fault, bool check_only)
2067
{
2068
int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
2069
u_int retries;
2070
uint64_t *ptpbase, ptpphys, pte, pgsize;
2071
uint32_t *ptpbase32, pte32;
2072
void *cookie;
2073
2074
*guest_fault = 0;
2075
2076
usermode = (paging->cpl == 3 ? 1 : 0);
2077
writable = prot & VM_PROT_WRITE;
2078
cookie = NULL;
2079
retval = 0;
2080
retries = 0;
2081
restart:
2082
ptpphys = paging->cr3; /* root of the page tables */
2083
ptp_release(&cookie);
2084
if (retries++ > 0)
2085
maybe_yield();
2086
2087
if (vie_canonical_check(paging->cpu_mode, gla)) {
2088
/*
2089
* XXX assuming a non-stack reference otherwise a stack fault
2090
* should be generated.
2091
*/
2092
if (!check_only)
2093
vm_inject_gp(vcpu);
2094
goto fault;
2095
}
2096
2097
if (paging->paging_mode == PAGING_MODE_FLAT) {
2098
*gpa = gla;
2099
goto done;
2100
}
2101
2102
if (paging->paging_mode == PAGING_MODE_32) {
2103
nlevels = 2;
2104
while (--nlevels >= 0) {
2105
/* Zero out the lower 12 bits. */
2106
ptpphys &= ~0xfff;
2107
2108
ptpbase32 = ptp_hold(vcpu, ptpphys, PAGE_SIZE,
2109
&cookie);
2110
2111
if (ptpbase32 == NULL)
2112
goto error;
2113
2114
ptpshift = PAGE_SHIFT + nlevels * 10;
2115
ptpindex = (gla >> ptpshift) & 0x3FF;
2116
pgsize = 1UL << ptpshift;
2117
2118
pte32 = ptpbase32[ptpindex];
2119
2120
if ((pte32 & PG_V) == 0 ||
2121
(usermode && (pte32 & PG_U) == 0) ||
2122
(writable && (pte32 & PG_RW) == 0)) {
2123
if (!check_only) {
2124
pfcode = pf_error_code(usermode, prot, 0,
2125
pte32);
2126
vm_inject_pf(vcpu, pfcode, gla);
2127
}
2128
goto fault;
2129
}
2130
2131
/*
2132
* Emulate the x86 MMU's management of the accessed
2133
* and dirty flags. While the accessed flag is set
2134
* at every level of the page table, the dirty flag
2135
* is only set at the last level providing the guest
2136
* physical address.
2137
*/
2138
if (!check_only && (pte32 & PG_A) == 0) {
2139
if (atomic_cmpset_32(&ptpbase32[ptpindex],
2140
pte32, pte32 | PG_A) == 0) {
2141
goto restart;
2142
}
2143
}
2144
2145
/* XXX must be ignored if CR4.PSE=0 */
2146
if (nlevels > 0 && (pte32 & PG_PS) != 0)
2147
break;
2148
2149
ptpphys = pte32;
2150
}
2151
2152
/* Set the dirty bit in the page table entry if necessary */
2153
if (!check_only && writable && (pte32 & PG_M) == 0) {
2154
if (atomic_cmpset_32(&ptpbase32[ptpindex],
2155
pte32, pte32 | PG_M) == 0) {
2156
goto restart;
2157
}
2158
}
2159
2160
/* Zero out the lower 'ptpshift' bits */
2161
pte32 >>= ptpshift; pte32 <<= ptpshift;
2162
*gpa = pte32 | (gla & (pgsize - 1));
2163
goto done;
2164
}
2165
2166
if (paging->paging_mode == PAGING_MODE_PAE) {
2167
/* Zero out the lower 5 bits and the upper 32 bits */
2168
ptpphys &= 0xffffffe0UL;
2169
2170
ptpbase = ptp_hold(vcpu, ptpphys, sizeof(*ptpbase) * 4,
2171
&cookie);
2172
if (ptpbase == NULL)
2173
goto error;
2174
2175
ptpindex = (gla >> 30) & 0x3;
2176
2177
pte = ptpbase[ptpindex];
2178
2179
if ((pte & PG_V) == 0) {
2180
if (!check_only) {
2181
pfcode = pf_error_code(usermode, prot, 0, pte);
2182
vm_inject_pf(vcpu, pfcode, gla);
2183
}
2184
goto fault;
2185
}
2186
2187
ptpphys = pte;
2188
2189
nlevels = 2;
2190
} else if (paging->paging_mode == PAGING_MODE_64_LA57) {
2191
nlevels = 5;
2192
} else {
2193
nlevels = 4;
2194
}
2195
2196
while (--nlevels >= 0) {
2197
/* Zero out the lower 12 bits and the upper 12 bits */
2198
ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
2199
2200
ptpbase = ptp_hold(vcpu, ptpphys, PAGE_SIZE, &cookie);
2201
if (ptpbase == NULL)
2202
goto error;
2203
2204
ptpshift = PAGE_SHIFT + nlevels * 9;
2205
ptpindex = (gla >> ptpshift) & 0x1FF;
2206
pgsize = 1UL << ptpshift;
2207
2208
pte = ptpbase[ptpindex];
2209
2210
if ((pte & PG_V) == 0 ||
2211
(usermode && (pte & PG_U) == 0) ||
2212
(writable && (pte & PG_RW) == 0)) {
2213
if (!check_only) {
2214
pfcode = pf_error_code(usermode, prot, 0, pte);
2215
vm_inject_pf(vcpu, pfcode, gla);
2216
}
2217
goto fault;
2218
}
2219
2220
/* Set the accessed bit in the page table entry */
2221
if (!check_only && (pte & PG_A) == 0) {
2222
if (atomic_cmpset_64(&ptpbase[ptpindex],
2223
pte, pte | PG_A) == 0) {
2224
goto restart;
2225
}
2226
}
2227
2228
if (nlevels > 0 && (pte & PG_PS) != 0) {
2229
if (pgsize > 1 * GB) {
2230
if (!check_only) {
2231
pfcode = pf_error_code(usermode, prot, 1,
2232
pte);
2233
vm_inject_pf(vcpu, pfcode, gla);
2234
}
2235
goto fault;
2236
}
2237
break;
2238
}
2239
2240
ptpphys = pte;
2241
}
2242
2243
/* Set the dirty bit in the page table entry if necessary */
2244
if (!check_only && writable && (pte & PG_M) == 0) {
2245
if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0)
2246
goto restart;
2247
}
2248
2249
/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
2250
pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
2251
*gpa = pte | (gla & (pgsize - 1));
2252
done:
2253
ptp_release(&cookie);
2254
KASSERT(retval == 0 || retval == EFAULT, ("%s: unexpected retval %d",
2255
__func__, retval));
2256
return (retval);
2257
error:
2258
retval = EFAULT;
2259
goto done;
2260
fault:
2261
*guest_fault = 1;
2262
goto done;
2263
}
2264
2265
int
2266
vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging,
2267
uint64_t gla, int prot, uint64_t *gpa, int *guest_fault)
2268
{
2269
2270
return (_vm_gla2gpa(vcpu, paging, gla, prot, gpa, guest_fault,
2271
false));
2272
}
2273
2274
int
2275
vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
2276
uint64_t gla, int prot, uint64_t *gpa, int *guest_fault)
2277
{
2278
2279
return (_vm_gla2gpa(vcpu, paging, gla, prot, gpa, guest_fault,
2280
true));
2281
}
2282
2283
int
2284
vmm_fetch_instruction(struct vcpu *vcpu, struct vm_guest_paging *paging,
2285
uint64_t rip, int inst_length, struct vie *vie, int *faultptr)
2286
{
2287
struct vm_copyinfo copyinfo[2];
2288
int error, prot;
2289
2290
if (inst_length > VIE_INST_SIZE)
2291
panic("vmm_fetch_instruction: invalid length %d", inst_length);
2292
2293
prot = PROT_READ | PROT_EXEC;
2294
error = vm_copy_setup(vcpu, paging, rip, inst_length, prot,
2295
copyinfo, nitems(copyinfo), faultptr);
2296
if (error || *faultptr)
2297
return (error);
2298
2299
vm_copyin(copyinfo, vie->inst, inst_length);
2300
vm_copy_teardown(copyinfo, nitems(copyinfo));
2301
vie->num_valid = inst_length;
2302
return (0);
2303
}
2304
#endif /* _KERNEL */
2305
2306
static int
2307
vie_peek(struct vie *vie, uint8_t *x)
2308
{
2309
2310
if (vie->num_processed < vie->num_valid) {
2311
*x = vie->inst[vie->num_processed];
2312
return (0);
2313
} else
2314
return (-1);
2315
}
2316
2317
static void
2318
vie_advance(struct vie *vie)
2319
{
2320
2321
vie->num_processed++;
2322
}
2323
2324
static bool
2325
segment_override(uint8_t x, int *seg)
2326
{
2327
2328
switch (x) {
2329
case 0x2E:
2330
*seg = VM_REG_GUEST_CS;
2331
break;
2332
case 0x36:
2333
*seg = VM_REG_GUEST_SS;
2334
break;
2335
case 0x3E:
2336
*seg = VM_REG_GUEST_DS;
2337
break;
2338
case 0x26:
2339
*seg = VM_REG_GUEST_ES;
2340
break;
2341
case 0x64:
2342
*seg = VM_REG_GUEST_FS;
2343
break;
2344
case 0x65:
2345
*seg = VM_REG_GUEST_GS;
2346
break;
2347
default:
2348
return (false);
2349
}
2350
return (true);
2351
}
2352
2353
static int
2354
decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
2355
{
2356
uint8_t x;
2357
2358
while (1) {
2359
if (vie_peek(vie, &x))
2360
return (-1);
2361
2362
if (x == 0x66)
2363
vie->opsize_override = 1;
2364
else if (x == 0x67)
2365
vie->addrsize_override = 1;
2366
else if (x == 0xF3)
2367
vie->repz_present = 1;
2368
else if (x == 0xF2)
2369
vie->repnz_present = 1;
2370
else if (segment_override(x, &vie->segment_register))
2371
vie->segment_override = 1;
2372
else
2373
break;
2374
2375
vie_advance(vie);
2376
}
2377
2378
/*
2379
* From section 2.2.1, "REX Prefixes", Intel SDM Vol 2:
2380
* - Only one REX prefix is allowed per instruction.
2381
* - The REX prefix must immediately precede the opcode byte or the
2382
* escape opcode byte.
2383
* - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3)
2384
* the mandatory prefix must come before the REX prefix.
2385
*/
2386
if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) {
2387
vie->rex_present = 1;
2388
vie->rex_w = x & 0x8 ? 1 : 0;
2389
vie->rex_r = x & 0x4 ? 1 : 0;
2390
vie->rex_x = x & 0x2 ? 1 : 0;
2391
vie->rex_b = x & 0x1 ? 1 : 0;
2392
vie_advance(vie);
2393
}
2394
2395
/*
2396
* ยง 2.3.5, "The VEX Prefix", SDM Vol 2.
2397
*/
2398
if ((cpu_mode == CPU_MODE_64BIT || cpu_mode == CPU_MODE_COMPATIBILITY)
2399
&& x == 0xC4) {
2400
const struct vie_op *optab;
2401
2402
/* 3-byte VEX prefix. */
2403
vie->vex_present = 1;
2404
2405
vie_advance(vie);
2406
if (vie_peek(vie, &x))
2407
return (-1);
2408
2409
/*
2410
* 2nd byte: [R', X', B', mmmmm[4:0]]. Bits are inverted
2411
* relative to REX encoding.
2412
*/
2413
vie->rex_r = x & 0x80 ? 0 : 1;
2414
vie->rex_x = x & 0x40 ? 0 : 1;
2415
vie->rex_b = x & 0x20 ? 0 : 1;
2416
2417
switch (x & 0x1F) {
2418
case 0x2:
2419
/* 0F 38. */
2420
optab = three_byte_opcodes_0f38;
2421
break;
2422
case 0x1:
2423
/* 0F class - nothing handled here yet. */
2424
/* FALLTHROUGH */
2425
case 0x3:
2426
/* 0F 3A class - nothing handled here yet. */
2427
/* FALLTHROUGH */
2428
default:
2429
/* Reserved (#UD). */
2430
return (-1);
2431
}
2432
2433
vie_advance(vie);
2434
if (vie_peek(vie, &x))
2435
return (-1);
2436
2437
/* 3rd byte: [W, vvvv[6:3], L, pp[1:0]]. */
2438
vie->rex_w = x & 0x80 ? 1 : 0;
2439
2440
vie->vex_reg = ((~(unsigned)x & 0x78u) >> 3);
2441
vie->vex_l = !!(x & 0x4);
2442
vie->vex_pp = (x & 0x3);
2443
2444
/* PP: 1=66 2=F3 3=F2 prefixes. */
2445
switch (vie->vex_pp) {
2446
case 0x1:
2447
vie->opsize_override = 1;
2448
break;
2449
case 0x2:
2450
vie->repz_present = 1;
2451
break;
2452
case 0x3:
2453
vie->repnz_present = 1;
2454
break;
2455
}
2456
2457
vie_advance(vie);
2458
2459
/* Opcode, sans literal prefix prefix. */
2460
if (vie_peek(vie, &x))
2461
return (-1);
2462
2463
vie->op = optab[x];
2464
if (vie->op.op_type == VIE_OP_TYPE_NONE)
2465
return (-1);
2466
2467
vie_advance(vie);
2468
}
2469
2470
/*
2471
* Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1
2472
*/
2473
if (cpu_mode == CPU_MODE_64BIT) {
2474
/*
2475
* Default address size is 64-bits and default operand size
2476
* is 32-bits.
2477
*/
2478
vie->addrsize = vie->addrsize_override ? 4 : 8;
2479
if (vie->rex_w)
2480
vie->opsize = 8;
2481
else if (vie->opsize_override)
2482
vie->opsize = 2;
2483
else
2484
vie->opsize = 4;
2485
} else if (cs_d) {
2486
/* Default address and operand sizes are 32-bits */
2487
vie->addrsize = vie->addrsize_override ? 2 : 4;
2488
vie->opsize = vie->opsize_override ? 2 : 4;
2489
} else {
2490
/* Default address and operand sizes are 16-bits */
2491
vie->addrsize = vie->addrsize_override ? 4 : 2;
2492
vie->opsize = vie->opsize_override ? 4 : 2;
2493
}
2494
return (0);
2495
}
2496
2497
static int
2498
decode_two_byte_opcode(struct vie *vie)
2499
{
2500
uint8_t x;
2501
2502
if (vie_peek(vie, &x))
2503
return (-1);
2504
2505
vie->op = two_byte_opcodes[x];
2506
2507
if (vie->op.op_type == VIE_OP_TYPE_NONE)
2508
return (-1);
2509
2510
vie_advance(vie);
2511
return (0);
2512
}
2513
2514
static int
2515
decode_opcode(struct vie *vie)
2516
{
2517
uint8_t x;
2518
2519
if (vie_peek(vie, &x))
2520
return (-1);
2521
2522
/* Already did this via VEX prefix. */
2523
if (vie->op.op_type != VIE_OP_TYPE_NONE)
2524
return (0);
2525
2526
vie->op = one_byte_opcodes[x];
2527
2528
if (vie->op.op_type == VIE_OP_TYPE_NONE)
2529
return (-1);
2530
2531
vie_advance(vie);
2532
2533
if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE)
2534
return (decode_two_byte_opcode(vie));
2535
2536
return (0);
2537
}
2538
2539
static int
2540
decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
2541
{
2542
uint8_t x;
2543
2544
if (vie->op.op_flags & VIE_OP_F_NO_MODRM)
2545
return (0);
2546
2547
if (cpu_mode == CPU_MODE_REAL)
2548
return (-1);
2549
2550
if (vie_peek(vie, &x))
2551
return (-1);
2552
2553
vie->mod = (x >> 6) & 0x3;
2554
vie->rm = (x >> 0) & 0x7;
2555
vie->reg = (x >> 3) & 0x7;
2556
2557
/*
2558
* A direct addressing mode makes no sense in the context of an EPT
2559
* fault. There has to be a memory access involved to cause the
2560
* EPT fault.
2561
*/
2562
if (vie->mod == VIE_MOD_DIRECT)
2563
return (-1);
2564
2565
if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
2566
(vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
2567
/*
2568
* Table 2-5: Special Cases of REX Encodings
2569
*
2570
* mod=0, r/m=5 is used in the compatibility mode to
2571
* indicate a disp32 without a base register.
2572
*
2573
* mod!=3, r/m=4 is used in the compatibility mode to
2574
* indicate that the SIB byte is present.
2575
*
2576
* The 'b' bit in the REX prefix is don't care in
2577
* this case.
2578
*/
2579
} else {
2580
vie->rm |= (vie->rex_b << 3);
2581
}
2582
2583
vie->reg |= (vie->rex_r << 3);
2584
2585
/* SIB */
2586
if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
2587
goto done;
2588
2589
vie->base_register = gpr_map[vie->rm];
2590
2591
switch (vie->mod) {
2592
case VIE_MOD_INDIRECT_DISP8:
2593
vie->disp_bytes = 1;
2594
break;
2595
case VIE_MOD_INDIRECT_DISP32:
2596
vie->disp_bytes = 4;
2597
break;
2598
case VIE_MOD_INDIRECT:
2599
if (vie->rm == VIE_RM_DISP32) {
2600
vie->disp_bytes = 4;
2601
/*
2602
* Table 2-7. RIP-Relative Addressing
2603
*
2604
* In 64-bit mode mod=00 r/m=101 implies [rip] + disp32
2605
* whereas in compatibility mode it just implies disp32.
2606
*/
2607
2608
if (cpu_mode == CPU_MODE_64BIT)
2609
vie->base_register = VM_REG_GUEST_RIP;
2610
else
2611
vie->base_register = VM_REG_LAST;
2612
}
2613
break;
2614
}
2615
2616
done:
2617
vie_advance(vie);
2618
2619
return (0);
2620
}
2621
2622
static int
2623
decode_sib(struct vie *vie)
2624
{
2625
uint8_t x;
2626
2627
/* Proceed only if SIB byte is present */
2628
if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
2629
return (0);
2630
2631
if (vie_peek(vie, &x))
2632
return (-1);
2633
2634
/* De-construct the SIB byte */
2635
vie->ss = (x >> 6) & 0x3;
2636
vie->index = (x >> 3) & 0x7;
2637
vie->base = (x >> 0) & 0x7;
2638
2639
/* Apply the REX prefix modifiers */
2640
vie->index |= vie->rex_x << 3;
2641
vie->base |= vie->rex_b << 3;
2642
2643
switch (vie->mod) {
2644
case VIE_MOD_INDIRECT_DISP8:
2645
vie->disp_bytes = 1;
2646
break;
2647
case VIE_MOD_INDIRECT_DISP32:
2648
vie->disp_bytes = 4;
2649
break;
2650
}
2651
2652
if (vie->mod == VIE_MOD_INDIRECT &&
2653
(vie->base == 5 || vie->base == 13)) {
2654
/*
2655
* Special case when base register is unused if mod = 0
2656
* and base = %rbp or %r13.
2657
*
2658
* Documented in:
2659
* Table 2-3: 32-bit Addressing Forms with the SIB Byte
2660
* Table 2-5: Special Cases of REX Encodings
2661
*/
2662
vie->disp_bytes = 4;
2663
} else {
2664
vie->base_register = gpr_map[vie->base];
2665
}
2666
2667
/*
2668
* All encodings of 'index' are valid except for %rsp (4).
2669
*
2670
* Documented in:
2671
* Table 2-3: 32-bit Addressing Forms with the SIB Byte
2672
* Table 2-5: Special Cases of REX Encodings
2673
*/
2674
if (vie->index != 4)
2675
vie->index_register = gpr_map[vie->index];
2676
2677
/* 'scale' makes sense only in the context of an index register */
2678
if (vie->index_register < VM_REG_LAST)
2679
vie->scale = 1 << vie->ss;
2680
2681
vie_advance(vie);
2682
2683
return (0);
2684
}
2685
2686
static int
2687
decode_displacement(struct vie *vie)
2688
{
2689
int n, i;
2690
uint8_t x;
2691
2692
union {
2693
char buf[4];
2694
int8_t signed8;
2695
int32_t signed32;
2696
} u;
2697
2698
if ((n = vie->disp_bytes) == 0)
2699
return (0);
2700
2701
if (n != 1 && n != 4)
2702
panic("decode_displacement: invalid disp_bytes %d", n);
2703
2704
for (i = 0; i < n; i++) {
2705
if (vie_peek(vie, &x))
2706
return (-1);
2707
2708
u.buf[i] = x;
2709
vie_advance(vie);
2710
}
2711
2712
if (n == 1)
2713
vie->displacement = u.signed8; /* sign-extended */
2714
else
2715
vie->displacement = u.signed32; /* sign-extended */
2716
2717
return (0);
2718
}
2719
2720
static int
2721
decode_immediate(struct vie *vie)
2722
{
2723
int i, n;
2724
uint8_t x;
2725
union {
2726
char buf[4];
2727
int8_t signed8;
2728
int16_t signed16;
2729
int32_t signed32;
2730
} u;
2731
2732
/* Figure out immediate operand size (if any) */
2733
if (vie->op.op_flags & VIE_OP_F_IMM) {
2734
/*
2735
* Section 2.2.1.5 "Immediates", Intel SDM:
2736
* In 64-bit mode the typical size of immediate operands
2737
* remains 32-bits. When the operand size if 64-bits, the
2738
* processor sign-extends all immediates to 64-bits prior
2739
* to their use.
2740
*/
2741
if (vie->opsize == 4 || vie->opsize == 8)
2742
vie->imm_bytes = 4;
2743
else
2744
vie->imm_bytes = 2;
2745
} else if (vie->op.op_flags & VIE_OP_F_IMM8) {
2746
vie->imm_bytes = 1;
2747
}
2748
2749
if ((n = vie->imm_bytes) == 0)
2750
return (0);
2751
2752
KASSERT(n == 1 || n == 2 || n == 4,
2753
("%s: invalid number of immediate bytes: %d", __func__, n));
2754
2755
for (i = 0; i < n; i++) {
2756
if (vie_peek(vie, &x))
2757
return (-1);
2758
2759
u.buf[i] = x;
2760
vie_advance(vie);
2761
}
2762
2763
/* sign-extend the immediate value before use */
2764
if (n == 1)
2765
vie->immediate = u.signed8;
2766
else if (n == 2)
2767
vie->immediate = u.signed16;
2768
else
2769
vie->immediate = u.signed32;
2770
2771
return (0);
2772
}
2773
2774
static int
2775
decode_moffset(struct vie *vie)
2776
{
2777
int i, n;
2778
uint8_t x;
2779
union {
2780
char buf[8];
2781
uint64_t u64;
2782
} u;
2783
2784
if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0)
2785
return (0);
2786
2787
/*
2788
* Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM:
2789
* The memory offset size follows the address-size of the instruction.
2790
*/
2791
n = vie->addrsize;
2792
KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n));
2793
2794
u.u64 = 0;
2795
for (i = 0; i < n; i++) {
2796
if (vie_peek(vie, &x))
2797
return (-1);
2798
2799
u.buf[i] = x;
2800
vie_advance(vie);
2801
}
2802
vie->displacement = u.u64;
2803
return (0);
2804
}
2805
2806
#ifdef _KERNEL
2807
/*
2808
* Verify that the 'guest linear address' provided as collateral of the nested
2809
* page table fault matches with our instruction decoding.
2810
*/
2811
static int
2812
verify_gla(struct vcpu *vcpu, uint64_t gla, struct vie *vie,
2813
enum vm_cpu_mode cpu_mode)
2814
{
2815
int error;
2816
uint64_t base, segbase, idx, gla2;
2817
enum vm_reg_name seg;
2818
struct seg_desc desc;
2819
2820
/* Skip 'gla' verification */
2821
if (gla == VIE_INVALID_GLA)
2822
return (0);
2823
2824
base = 0;
2825
if (vie->base_register != VM_REG_LAST) {
2826
error = vm_get_register(vcpu, vie->base_register, &base);
2827
if (error) {
2828
printf("verify_gla: error %d getting base reg %d\n",
2829
error, vie->base_register);
2830
return (-1);
2831
}
2832
2833
/*
2834
* RIP-relative addressing starts from the following
2835
* instruction
2836
*/
2837
if (vie->base_register == VM_REG_GUEST_RIP)
2838
base += vie->num_processed;
2839
}
2840
2841
idx = 0;
2842
if (vie->index_register != VM_REG_LAST) {
2843
error = vm_get_register(vcpu, vie->index_register, &idx);
2844
if (error) {
2845
printf("verify_gla: error %d getting index reg %d\n",
2846
error, vie->index_register);
2847
return (-1);
2848
}
2849
}
2850
2851
/*
2852
* From "Specifying a Segment Selector", Intel SDM, Vol 1
2853
*
2854
* In 64-bit mode, segmentation is generally (but not
2855
* completely) disabled. The exceptions are the FS and GS
2856
* segments.
2857
*
2858
* In legacy IA-32 mode, when the ESP or EBP register is used
2859
* as the base, the SS segment is the default segment. For
2860
* other data references, except when relative to stack or
2861
* string destination the DS segment is the default. These
2862
* can be overridden to allow other segments to be accessed.
2863
*/
2864
if (vie->segment_override)
2865
seg = vie->segment_register;
2866
else if (vie->base_register == VM_REG_GUEST_RSP ||
2867
vie->base_register == VM_REG_GUEST_RBP)
2868
seg = VM_REG_GUEST_SS;
2869
else
2870
seg = VM_REG_GUEST_DS;
2871
if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
2872
seg != VM_REG_GUEST_GS) {
2873
segbase = 0;
2874
} else {
2875
error = vm_get_seg_desc(vcpu, seg, &desc);
2876
if (error) {
2877
printf("verify_gla: error %d getting segment"
2878
" descriptor %d", error,
2879
vie->segment_register);
2880
return (-1);
2881
}
2882
segbase = desc.base;
2883
}
2884
2885
gla2 = segbase + base + vie->scale * idx + vie->displacement;
2886
gla2 &= size2mask[vie->addrsize];
2887
if (gla != gla2) {
2888
printf("verify_gla mismatch: segbase(0x%0lx)"
2889
"base(0x%0lx), scale(%d), index(0x%0lx), "
2890
"disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n",
2891
segbase, base, vie->scale, idx, vie->displacement,
2892
gla, gla2);
2893
return (-1);
2894
}
2895
2896
return (0);
2897
}
2898
#endif /* _KERNEL */
2899
2900
int
2901
#ifdef _KERNEL
2902
vmm_decode_instruction(struct vcpu *vcpu, uint64_t gla,
2903
enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
2904
#else
2905
vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
2906
#endif
2907
{
2908
2909
if (decode_prefixes(vie, cpu_mode, cs_d))
2910
return (-1);
2911
2912
if (decode_opcode(vie))
2913
return (-1);
2914
2915
if (decode_modrm(vie, cpu_mode))
2916
return (-1);
2917
2918
if (decode_sib(vie))
2919
return (-1);
2920
2921
if (decode_displacement(vie))
2922
return (-1);
2923
2924
if (decode_immediate(vie))
2925
return (-1);
2926
2927
if (decode_moffset(vie))
2928
return (-1);
2929
2930
#ifdef _KERNEL
2931
if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
2932
if (verify_gla(vcpu, gla, vie, cpu_mode))
2933
return (-1);
2934
}
2935
#endif
2936
2937
vie->decoded = 1; /* success */
2938
2939
return (0);
2940
}
2941
2942