Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
4561 views
1
/**************************************************************************
2
*
3
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice shall be included
13
* in all copies or substantial portions of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
* OTHER DEALINGS IN THE SOFTWARE.
22
*
23
**************************************************************************/
24
25
#include "pipe/p_config.h"
26
#include "util/u_cpu_detect.h"
27
28
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
29
30
#include "pipe/p_compiler.h"
31
#include "util/u_debug.h"
32
#include "util/u_pointer.h"
33
34
#include "rtasm_execmem.h"
35
#include "rtasm_x86sse.h"
36
37
#define DISASSEM 0
38
#define X86_TWOB 0x0f
39
40
41
#define DUMP_SSE 0
42
43
44
void x86_print_reg( struct x86_reg reg )
45
{
46
if (reg.mod != mod_REG)
47
debug_printf( "[" );
48
49
switch( reg.file ) {
50
case file_REG32:
51
switch( reg.idx ) {
52
case reg_AX: debug_printf( "EAX" ); break;
53
case reg_CX: debug_printf( "ECX" ); break;
54
case reg_DX: debug_printf( "EDX" ); break;
55
case reg_BX: debug_printf( "EBX" ); break;
56
case reg_SP: debug_printf( "ESP" ); break;
57
case reg_BP: debug_printf( "EBP" ); break;
58
case reg_SI: debug_printf( "ESI" ); break;
59
case reg_DI: debug_printf( "EDI" ); break;
60
}
61
break;
62
case file_MMX:
63
debug_printf( "MMX%u", reg.idx );
64
break;
65
case file_XMM:
66
debug_printf( "XMM%u", reg.idx );
67
break;
68
case file_x87:
69
debug_printf( "fp%u", reg.idx );
70
break;
71
}
72
73
if (reg.mod == mod_DISP8 ||
74
reg.mod == mod_DISP32)
75
debug_printf("+%d", reg.disp);
76
77
if (reg.mod != mod_REG)
78
debug_printf( "]" );
79
}
80
81
#if DUMP_SSE
82
83
#define DUMP_START() debug_printf( "\n" )
84
#define DUMP_END() debug_printf( "\n" )
85
86
#define DUMP() do { \
87
const char *foo = __FUNCTION__; \
88
while (*foo && *foo != '_') \
89
foo++; \
90
if (*foo) \
91
foo++; \
92
debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \
93
} while (0)
94
95
#define DUMP_I( I ) do { \
96
DUMP(); \
97
debug_printf( "%u", I ); \
98
} while( 0 )
99
100
#define DUMP_R( R0 ) do { \
101
DUMP(); \
102
x86_print_reg( R0 ); \
103
} while( 0 )
104
105
#define DUMP_RR( R0, R1 ) do { \
106
DUMP(); \
107
x86_print_reg( R0 ); \
108
debug_printf( ", " ); \
109
x86_print_reg( R1 ); \
110
} while( 0 )
111
112
#define DUMP_RI( R0, I ) do { \
113
DUMP(); \
114
x86_print_reg( R0 ); \
115
debug_printf( ", %u", I ); \
116
} while( 0 )
117
118
#define DUMP_RRI( R0, R1, I ) do { \
119
DUMP(); \
120
x86_print_reg( R0 ); \
121
debug_printf( ", " ); \
122
x86_print_reg( R1 ); \
123
debug_printf( ", %u", I ); \
124
} while( 0 )
125
126
#else
127
128
#define DUMP_START()
129
#define DUMP_END()
130
#define DUMP( )
131
#define DUMP_I( I )
132
#define DUMP_R( R0 )
133
#define DUMP_RR( R0, R1 )
134
#define DUMP_RI( R0, I )
135
#define DUMP_RRI( R0, R1, I )
136
137
#endif
138
139
140
static void do_realloc( struct x86_function *p )
141
{
142
if (p->store == p->error_overflow) {
143
p->csr = p->store;
144
}
145
else if (p->size == 0) {
146
p->size = 1024;
147
p->store = rtasm_exec_malloc(p->size);
148
p->csr = p->store;
149
}
150
else {
151
uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store );
152
unsigned char *tmp = p->store;
153
p->size *= 2;
154
p->store = rtasm_exec_malloc(p->size);
155
156
if (p->store) {
157
memcpy(p->store, tmp, used);
158
p->csr = p->store + used;
159
}
160
else {
161
p->csr = p->store;
162
}
163
164
rtasm_exec_free(tmp);
165
}
166
167
if (p->store == NULL) {
168
p->store = p->csr = p->error_overflow;
169
p->size = sizeof(p->error_overflow);
170
}
171
}
172
173
/* Emit bytes to the instruction stream:
174
*/
175
static unsigned char *reserve( struct x86_function *p, int bytes )
176
{
177
if (p->csr + bytes - p->store > (int) p->size)
178
do_realloc(p);
179
180
{
181
unsigned char *csr = p->csr;
182
p->csr += bytes;
183
return csr;
184
}
185
}
186
187
188
189
static void emit_1b( struct x86_function *p, char b0 )
190
{
191
char *csr = (char *)reserve(p, 1);
192
*csr = b0;
193
}
194
195
static void emit_1i( struct x86_function *p, int i0 )
196
{
197
int *icsr = (int *)reserve(p, sizeof(i0));
198
*icsr = i0;
199
}
200
201
static void emit_1ub( struct x86_function *p, unsigned char b0 )
202
{
203
unsigned char *csr = reserve(p, 1);
204
*csr++ = b0;
205
}
206
207
static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
208
{
209
unsigned char *csr = reserve(p, 2);
210
*csr++ = b0;
211
*csr++ = b1;
212
}
213
214
static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
215
{
216
unsigned char *csr = reserve(p, 3);
217
*csr++ = b0;
218
*csr++ = b1;
219
*csr++ = b2;
220
}
221
222
223
/* Build a modRM byte + possible displacement. No treatment of SIB
224
* indexing. BZZT - no way to encode an absolute address.
225
*
226
* This is the "/r" field in the x86 manuals...
227
*/
228
static void emit_modrm( struct x86_function *p,
229
struct x86_reg reg,
230
struct x86_reg regmem )
231
{
232
unsigned char val = 0;
233
234
assert(reg.mod == mod_REG);
235
236
/* TODO: support extended x86-64 registers */
237
assert(reg.idx < 8);
238
assert(regmem.idx < 8);
239
240
val |= regmem.mod << 6; /* mod field */
241
val |= reg.idx << 3; /* reg field */
242
val |= regmem.idx; /* r/m field */
243
244
emit_1ub(p, val);
245
246
/* Oh-oh we've stumbled into the SIB thing.
247
*/
248
if (regmem.file == file_REG32 &&
249
regmem.idx == reg_SP &&
250
regmem.mod != mod_REG) {
251
emit_1ub(p, 0x24); /* simplistic! */
252
}
253
254
switch (regmem.mod) {
255
case mod_REG:
256
case mod_INDIRECT:
257
break;
258
case mod_DISP8:
259
emit_1b(p, (char) regmem.disp);
260
break;
261
case mod_DISP32:
262
emit_1i(p, regmem.disp);
263
break;
264
default:
265
assert(0);
266
break;
267
}
268
}
269
270
/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes.
271
*/
272
static void emit_modrm_noreg( struct x86_function *p,
273
unsigned op,
274
struct x86_reg regmem )
275
{
276
struct x86_reg dummy = x86_make_reg(file_REG32, op);
277
emit_modrm(p, dummy, regmem);
278
}
279
280
/* Many x86 instructions have two opcodes to cope with the situations
281
* where the destination is a register or memory reference
282
* respectively. This function selects the correct opcode based on
283
* the arguments presented.
284
*/
285
static void emit_op_modrm( struct x86_function *p,
286
unsigned char op_dst_is_reg,
287
unsigned char op_dst_is_mem,
288
struct x86_reg dst,
289
struct x86_reg src )
290
{
291
switch (dst.mod) {
292
case mod_REG:
293
emit_1ub(p, op_dst_is_reg);
294
emit_modrm(p, dst, src);
295
break;
296
case mod_INDIRECT:
297
case mod_DISP32:
298
case mod_DISP8:
299
assert(src.mod == mod_REG);
300
emit_1ub(p, op_dst_is_mem);
301
emit_modrm(p, src, dst);
302
break;
303
default:
304
assert(0);
305
break;
306
}
307
}
308
309
310
311
312
313
314
315
/* Create and manipulate registers and regmem values:
316
*/
317
struct x86_reg x86_make_reg( enum x86_reg_file file,
318
enum x86_reg_name idx )
319
{
320
struct x86_reg reg;
321
322
reg.file = file;
323
reg.idx = idx;
324
reg.mod = mod_REG;
325
reg.disp = 0;
326
327
return reg;
328
}
329
330
struct x86_reg x86_make_disp( struct x86_reg reg,
331
int disp )
332
{
333
assert(reg.file == file_REG32);
334
335
if (reg.mod == mod_REG)
336
reg.disp = disp;
337
else
338
reg.disp += disp;
339
340
if (reg.disp == 0 && reg.idx != reg_BP)
341
reg.mod = mod_INDIRECT;
342
else if (reg.disp <= 127 && reg.disp >= -128)
343
reg.mod = mod_DISP8;
344
else
345
reg.mod = mod_DISP32;
346
347
return reg;
348
}
349
350
struct x86_reg x86_deref( struct x86_reg reg )
351
{
352
return x86_make_disp(reg, 0);
353
}
354
355
struct x86_reg x86_get_base_reg( struct x86_reg reg )
356
{
357
return x86_make_reg( reg.file, reg.idx );
358
}
359
360
int x86_get_label( struct x86_function *p )
361
{
362
return p->csr - p->store;
363
}
364
365
366
367
/***********************************************************************
368
* x86 instructions
369
*/
370
371
372
void x64_rexw(struct x86_function *p)
373
{
374
if(x86_target(p) != X86_32)
375
emit_1ub(p, 0x48);
376
}
377
378
void x86_jcc( struct x86_function *p,
379
enum x86_cc cc,
380
int label )
381
{
382
int offset = label - (x86_get_label(p) + 2);
383
DUMP_I(cc);
384
385
if (offset < 0) {
386
/*assert(p->csr - p->store > -offset);*/
387
if (p->csr - p->store <= -offset) {
388
/* probably out of memory (using the error_overflow buffer) */
389
return;
390
}
391
}
392
393
if (offset <= 127 && offset >= -128) {
394
emit_1ub(p, 0x70 + cc);
395
emit_1b(p, (char) offset);
396
}
397
else {
398
offset = label - (x86_get_label(p) + 6);
399
emit_2ub(p, 0x0f, 0x80 + cc);
400
emit_1i(p, offset);
401
}
402
}
403
404
/* Always use a 32bit offset for forward jumps:
405
*/
406
int x86_jcc_forward( struct x86_function *p,
407
enum x86_cc cc )
408
{
409
DUMP_I(cc);
410
emit_2ub(p, 0x0f, 0x80 + cc);
411
emit_1i(p, 0);
412
return x86_get_label(p);
413
}
414
415
int x86_jmp_forward( struct x86_function *p)
416
{
417
DUMP();
418
emit_1ub(p, 0xe9);
419
emit_1i(p, 0);
420
return x86_get_label(p);
421
}
422
423
int x86_call_forward( struct x86_function *p)
424
{
425
DUMP();
426
427
emit_1ub(p, 0xe8);
428
emit_1i(p, 0);
429
return x86_get_label(p);
430
}
431
432
/* Fixup offset from forward jump:
433
*/
434
void x86_fixup_fwd_jump( struct x86_function *p,
435
int fixup )
436
{
437
*(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup;
438
}
439
440
void x86_jmp( struct x86_function *p, int label)
441
{
442
DUMP_I( label );
443
emit_1ub(p, 0xe9);
444
emit_1i(p, label - x86_get_label(p) - 4);
445
}
446
447
void x86_call( struct x86_function *p, struct x86_reg reg)
448
{
449
DUMP_R( reg );
450
emit_1ub(p, 0xff);
451
emit_modrm_noreg(p, 2, reg);
452
}
453
454
455
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
456
{
457
DUMP_RI( dst, imm );
458
assert(dst.file == file_REG32);
459
assert(dst.mod == mod_REG);
460
emit_1ub(p, 0xb8 + dst.idx);
461
emit_1i(p, imm);
462
}
463
464
void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
465
{
466
DUMP_RI( dst, imm );
467
if(dst.mod == mod_REG)
468
x86_mov_reg_imm(p, dst, imm);
469
else
470
{
471
emit_1ub(p, 0xc7);
472
emit_modrm_noreg(p, 0, dst);
473
emit_1i(p, imm);
474
}
475
}
476
477
void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
478
{
479
DUMP_RI( dst, imm );
480
emit_1ub(p, 0x66);
481
if(dst.mod == mod_REG)
482
{
483
emit_1ub(p, 0xb8 + dst.idx);
484
emit_2ub(p, imm & 0xff, imm >> 8);
485
}
486
else
487
{
488
emit_1ub(p, 0xc7);
489
emit_modrm_noreg(p, 0, dst);
490
emit_2ub(p, imm & 0xff, imm >> 8);
491
}
492
}
493
494
void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
495
{
496
DUMP_RI( dst, imm );
497
if(dst.mod == mod_REG)
498
{
499
emit_1ub(p, 0xb0 + dst.idx);
500
emit_1ub(p, imm);
501
}
502
else
503
{
504
emit_1ub(p, 0xc6);
505
emit_modrm_noreg(p, 0, dst);
506
emit_1ub(p, imm);
507
}
508
}
509
510
/**
511
* Immediate group 1 instructions.
512
*/
513
static inline void
514
x86_group1_imm( struct x86_function *p,
515
unsigned op, struct x86_reg dst, int imm )
516
{
517
assert(dst.file == file_REG32);
518
assert(dst.mod == mod_REG);
519
if(-0x80 <= imm && imm < 0x80) {
520
emit_1ub(p, 0x83);
521
emit_modrm_noreg(p, op, dst);
522
emit_1b(p, (char)imm);
523
}
524
else {
525
emit_1ub(p, 0x81);
526
emit_modrm_noreg(p, op, dst);
527
emit_1i(p, imm);
528
}
529
}
530
531
void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm )
532
{
533
DUMP_RI( dst, imm );
534
x86_group1_imm(p, 0, dst, imm);
535
}
536
537
void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm )
538
{
539
DUMP_RI( dst, imm );
540
x86_group1_imm(p, 1, dst, imm);
541
}
542
543
void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm )
544
{
545
DUMP_RI( dst, imm );
546
x86_group1_imm(p, 4, dst, imm);
547
}
548
549
void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm )
550
{
551
DUMP_RI( dst, imm );
552
x86_group1_imm(p, 5, dst, imm);
553
}
554
555
void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm )
556
{
557
DUMP_RI( dst, imm );
558
x86_group1_imm(p, 6, dst, imm);
559
}
560
561
void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm )
562
{
563
DUMP_RI( dst, imm );
564
x86_group1_imm(p, 7, dst, imm);
565
}
566
567
568
void x86_push( struct x86_function *p,
569
struct x86_reg reg )
570
{
571
DUMP_R( reg );
572
if (reg.mod == mod_REG)
573
emit_1ub(p, 0x50 + reg.idx);
574
else
575
{
576
emit_1ub(p, 0xff);
577
emit_modrm_noreg(p, 6, reg);
578
}
579
580
581
p->stack_offset += sizeof(void*);
582
}
583
584
void x86_push_imm32( struct x86_function *p,
585
int imm32 )
586
{
587
DUMP_I( imm32 );
588
emit_1ub(p, 0x68);
589
emit_1i(p, imm32);
590
591
p->stack_offset += sizeof(void*);
592
}
593
594
595
void x86_pop( struct x86_function *p,
596
struct x86_reg reg )
597
{
598
DUMP_R( reg );
599
assert(reg.mod == mod_REG);
600
emit_1ub(p, 0x58 + reg.idx);
601
p->stack_offset -= sizeof(void*);
602
}
603
604
void x86_inc( struct x86_function *p,
605
struct x86_reg reg )
606
{
607
DUMP_R( reg );
608
if(x86_target(p) == X86_32 && reg.mod == mod_REG)
609
{
610
emit_1ub(p, 0x40 + reg.idx);
611
return;
612
}
613
emit_1ub(p, 0xff);
614
emit_modrm_noreg(p, 0, reg);
615
}
616
617
void x86_dec( struct x86_function *p,
618
struct x86_reg reg )
619
{
620
DUMP_R( reg );
621
if(x86_target(p) == X86_32 && reg.mod == mod_REG)
622
{
623
emit_1ub(p, 0x48 + reg.idx);
624
return;
625
}
626
emit_1ub(p, 0xff);
627
emit_modrm_noreg(p, 1, reg);
628
}
629
630
void x86_ret( struct x86_function *p )
631
{
632
DUMP();
633
assert(p->stack_offset == 0);
634
emit_1ub(p, 0xc3);
635
}
636
637
void x86_retw( struct x86_function *p, unsigned short imm )
638
{
639
DUMP();
640
emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff);
641
}
642
643
void x86_sahf( struct x86_function *p )
644
{
645
DUMP();
646
emit_1ub(p, 0x9e);
647
}
648
649
void x86_mov( struct x86_function *p,
650
struct x86_reg dst,
651
struct x86_reg src )
652
{
653
DUMP_RR( dst, src );
654
/* special hack for reading arguments until we support x86-64 registers everywhere */
655
if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
656
{
657
uint8_t rex = 0x40;
658
if(dst.idx >= 8)
659
{
660
rex |= 4;
661
dst.idx -= 8;
662
}
663
if(src.idx >= 8)
664
{
665
rex |= 1;
666
src.idx -= 8;
667
}
668
emit_1ub(p, rex);
669
}
670
emit_op_modrm( p, 0x8b, 0x89, dst, src );
671
}
672
673
void x86_mov16( struct x86_function *p,
674
struct x86_reg dst,
675
struct x86_reg src )
676
{
677
DUMP_RR( dst, src );
678
emit_1ub(p, 0x66);
679
emit_op_modrm( p, 0x8b, 0x89, dst, src );
680
}
681
682
void x86_mov8( struct x86_function *p,
683
struct x86_reg dst,
684
struct x86_reg src )
685
{
686
DUMP_RR( dst, src );
687
emit_op_modrm( p, 0x8a, 0x88, dst, src );
688
}
689
690
void x64_mov64( struct x86_function *p,
691
struct x86_reg dst,
692
struct x86_reg src )
693
{
694
uint8_t rex = 0x48;
695
DUMP_RR( dst, src );
696
assert(x86_target(p) != X86_32);
697
698
/* special hack for reading arguments until we support x86-64 registers everywhere */
699
if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
700
{
701
if(dst.idx >= 8)
702
{
703
rex |= 4;
704
dst.idx -= 8;
705
}
706
if(src.idx >= 8)
707
{
708
rex |= 1;
709
src.idx -= 8;
710
}
711
}
712
emit_1ub(p, rex);
713
emit_op_modrm( p, 0x8b, 0x89, dst, src );
714
}
715
716
void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
717
{
718
DUMP_RR( dst, src );
719
emit_2ub(p, 0x0f, 0xb6);
720
emit_modrm(p, dst, src);
721
}
722
723
void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
724
{
725
DUMP_RR( dst, src );
726
emit_2ub(p, 0x0f, 0xb7);
727
emit_modrm(p, dst, src);
728
}
729
730
void x86_cmovcc( struct x86_function *p,
731
struct x86_reg dst,
732
struct x86_reg src,
733
enum x86_cc cc)
734
{
735
DUMP_RRI( dst, src, cc );
736
emit_2ub( p, 0x0f, 0x40 + cc );
737
emit_modrm( p, dst, src );
738
}
739
740
void x86_xor( struct x86_function *p,
741
struct x86_reg dst,
742
struct x86_reg src )
743
{
744
DUMP_RR( dst, src );
745
emit_op_modrm( p, 0x33, 0x31, dst, src );
746
}
747
748
void x86_cmp( struct x86_function *p,
749
struct x86_reg dst,
750
struct x86_reg src )
751
{
752
DUMP_RR( dst, src );
753
emit_op_modrm( p, 0x3b, 0x39, dst, src );
754
}
755
756
void x86_lea( struct x86_function *p,
757
struct x86_reg dst,
758
struct x86_reg src )
759
{
760
DUMP_RR( dst, src );
761
emit_1ub(p, 0x8d);
762
emit_modrm( p, dst, src );
763
}
764
765
void x86_test( struct x86_function *p,
766
struct x86_reg dst,
767
struct x86_reg src )
768
{
769
DUMP_RR( dst, src );
770
emit_1ub(p, 0x85);
771
emit_modrm( p, dst, src );
772
}
773
774
void x86_add( struct x86_function *p,
775
struct x86_reg dst,
776
struct x86_reg src )
777
{
778
DUMP_RR( dst, src );
779
emit_op_modrm(p, 0x03, 0x01, dst, src );
780
}
781
782
/* Calculate EAX * src, results in EDX:EAX.
783
*/
784
void x86_mul( struct x86_function *p,
785
struct x86_reg src )
786
{
787
DUMP_R( src );
788
emit_1ub(p, 0xf7);
789
emit_modrm_noreg(p, 4, src );
790
}
791
792
793
void x86_imul( struct x86_function *p,
794
struct x86_reg dst,
795
struct x86_reg src )
796
{
797
DUMP_RR( dst, src );
798
emit_2ub(p, X86_TWOB, 0xAF);
799
emit_modrm(p, dst, src);
800
}
801
802
803
void x86_sub( struct x86_function *p,
804
struct x86_reg dst,
805
struct x86_reg src )
806
{
807
DUMP_RR( dst, src );
808
emit_op_modrm(p, 0x2b, 0x29, dst, src );
809
}
810
811
void x86_or( struct x86_function *p,
812
struct x86_reg dst,
813
struct x86_reg src )
814
{
815
DUMP_RR( dst, src );
816
emit_op_modrm( p, 0x0b, 0x09, dst, src );
817
}
818
819
void x86_and( struct x86_function *p,
820
struct x86_reg dst,
821
struct x86_reg src )
822
{
823
DUMP_RR( dst, src );
824
emit_op_modrm( p, 0x23, 0x21, dst, src );
825
}
826
827
void x86_div( struct x86_function *p,
828
struct x86_reg src )
829
{
830
assert(src.file == file_REG32 && src.mod == mod_REG);
831
emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
832
}
833
834
void x86_bswap( struct x86_function *p, struct x86_reg reg )
835
{
836
DUMP_R(reg);
837
assert(reg.file == file_REG32);
838
assert(reg.mod == mod_REG);
839
emit_2ub(p, 0x0f, 0xc8 + reg.idx);
840
}
841
842
void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
843
{
844
DUMP_RI(reg, imm);
845
if(imm == 1)
846
{
847
emit_1ub(p, 0xd1);
848
emit_modrm_noreg(p, 5, reg);
849
}
850
else
851
{
852
emit_1ub(p, 0xc1);
853
emit_modrm_noreg(p, 5, reg);
854
emit_1ub(p, imm);
855
}
856
}
857
858
void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
859
{
860
DUMP_RI(reg, imm);
861
if(imm == 1)
862
{
863
emit_1ub(p, 0xd1);
864
emit_modrm_noreg(p, 7, reg);
865
}
866
else
867
{
868
emit_1ub(p, 0xc1);
869
emit_modrm_noreg(p, 7, reg);
870
emit_1ub(p, imm);
871
}
872
}
873
874
void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
875
{
876
DUMP_RI(reg, imm);
877
if(imm == 1)
878
{
879
emit_1ub(p, 0xd1);
880
emit_modrm_noreg(p, 4, reg);
881
}
882
else
883
{
884
emit_1ub(p, 0xc1);
885
emit_modrm_noreg(p, 4, reg);
886
emit_1ub(p, imm);
887
}
888
}
889
890
891
/***********************************************************************
892
* SSE instructions
893
*/
894
895
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr)
896
{
897
DUMP_R( ptr );
898
assert(ptr.mod != mod_REG);
899
emit_2ub(p, 0x0f, 0x18);
900
emit_modrm_noreg(p, 0, ptr);
901
}
902
903
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr)
904
{
905
DUMP_R( ptr );
906
assert(ptr.mod != mod_REG);
907
emit_2ub(p, 0x0f, 0x18);
908
emit_modrm_noreg(p, 1, ptr);
909
}
910
911
void sse_prefetch1( struct x86_function *p, struct x86_reg ptr)
912
{
913
DUMP_R( ptr );
914
assert(ptr.mod != mod_REG);
915
emit_2ub(p, 0x0f, 0x18);
916
emit_modrm_noreg(p, 2, ptr);
917
}
918
919
void sse_movntps( struct x86_function *p,
920
struct x86_reg dst,
921
struct x86_reg src)
922
{
923
DUMP_RR( dst, src );
924
925
assert(dst.mod != mod_REG);
926
assert(src.mod == mod_REG);
927
emit_2ub(p, 0x0f, 0x2b);
928
emit_modrm(p, src, dst);
929
}
930
931
932
933
934
void sse_movss( struct x86_function *p,
935
struct x86_reg dst,
936
struct x86_reg src )
937
{
938
DUMP_RR( dst, src );
939
emit_2ub(p, 0xF3, X86_TWOB);
940
emit_op_modrm( p, 0x10, 0x11, dst, src );
941
}
942
943
void sse_movaps( struct x86_function *p,
944
struct x86_reg dst,
945
struct x86_reg src )
946
{
947
DUMP_RR( dst, src );
948
emit_1ub(p, X86_TWOB);
949
emit_op_modrm( p, 0x28, 0x29, dst, src );
950
}
951
952
void sse_movups( struct x86_function *p,
953
struct x86_reg dst,
954
struct x86_reg src )
955
{
956
DUMP_RR( dst, src );
957
emit_1ub(p, X86_TWOB);
958
emit_op_modrm( p, 0x10, 0x11, dst, src );
959
}
960
961
void sse_movhps( struct x86_function *p,
962
struct x86_reg dst,
963
struct x86_reg src )
964
{
965
DUMP_RR( dst, src );
966
assert(dst.mod != mod_REG || src.mod != mod_REG);
967
emit_1ub(p, X86_TWOB);
968
emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
969
}
970
971
void sse_movlps( struct x86_function *p,
972
struct x86_reg dst,
973
struct x86_reg src )
974
{
975
DUMP_RR( dst, src );
976
assert(dst.mod != mod_REG || src.mod != mod_REG);
977
emit_1ub(p, X86_TWOB);
978
emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
979
}
980
981
void sse_maxps( struct x86_function *p,
982
struct x86_reg dst,
983
struct x86_reg src )
984
{
985
DUMP_RR( dst, src );
986
emit_2ub(p, X86_TWOB, 0x5F);
987
emit_modrm( p, dst, src );
988
}
989
990
void sse_maxss( struct x86_function *p,
991
struct x86_reg dst,
992
struct x86_reg src )
993
{
994
DUMP_RR( dst, src );
995
emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
996
emit_modrm( p, dst, src );
997
}
998
999
void sse_divss( struct x86_function *p,
1000
struct x86_reg dst,
1001
struct x86_reg src )
1002
{
1003
DUMP_RR( dst, src );
1004
emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
1005
emit_modrm( p, dst, src );
1006
}
1007
1008
void sse_minps( struct x86_function *p,
1009
struct x86_reg dst,
1010
struct x86_reg src )
1011
{
1012
DUMP_RR( dst, src );
1013
emit_2ub(p, X86_TWOB, 0x5D);
1014
emit_modrm( p, dst, src );
1015
}
1016
1017
void sse_subps( struct x86_function *p,
1018
struct x86_reg dst,
1019
struct x86_reg src )
1020
{
1021
DUMP_RR( dst, src );
1022
emit_2ub(p, X86_TWOB, 0x5C);
1023
emit_modrm( p, dst, src );
1024
}
1025
1026
void sse_mulps( struct x86_function *p,
1027
struct x86_reg dst,
1028
struct x86_reg src )
1029
{
1030
DUMP_RR( dst, src );
1031
emit_2ub(p, X86_TWOB, 0x59);
1032
emit_modrm( p, dst, src );
1033
}
1034
1035
void sse_mulss( struct x86_function *p,
1036
struct x86_reg dst,
1037
struct x86_reg src )
1038
{
1039
DUMP_RR( dst, src );
1040
emit_3ub(p, 0xF3, X86_TWOB, 0x59);
1041
emit_modrm( p, dst, src );
1042
}
1043
1044
void sse_addps( struct x86_function *p,
1045
struct x86_reg dst,
1046
struct x86_reg src )
1047
{
1048
DUMP_RR( dst, src );
1049
emit_2ub(p, X86_TWOB, 0x58);
1050
emit_modrm( p, dst, src );
1051
}
1052
1053
void sse_addss( struct x86_function *p,
1054
struct x86_reg dst,
1055
struct x86_reg src )
1056
{
1057
DUMP_RR( dst, src );
1058
emit_3ub(p, 0xF3, X86_TWOB, 0x58);
1059
emit_modrm( p, dst, src );
1060
}
1061
1062
void sse_andnps( struct x86_function *p,
1063
struct x86_reg dst,
1064
struct x86_reg src )
1065
{
1066
DUMP_RR( dst, src );
1067
emit_2ub(p, X86_TWOB, 0x55);
1068
emit_modrm( p, dst, src );
1069
}
1070
1071
void sse_andps( struct x86_function *p,
1072
struct x86_reg dst,
1073
struct x86_reg src )
1074
{
1075
DUMP_RR( dst, src );
1076
emit_2ub(p, X86_TWOB, 0x54);
1077
emit_modrm( p, dst, src );
1078
}
1079
1080
void sse_rsqrtps( struct x86_function *p,
1081
struct x86_reg dst,
1082
struct x86_reg src )
1083
{
1084
DUMP_RR( dst, src );
1085
emit_2ub(p, X86_TWOB, 0x52);
1086
emit_modrm( p, dst, src );
1087
}
1088
1089
void sse_rsqrtss( struct x86_function *p,
1090
struct x86_reg dst,
1091
struct x86_reg src )
1092
{
1093
DUMP_RR( dst, src );
1094
emit_3ub(p, 0xF3, X86_TWOB, 0x52);
1095
emit_modrm( p, dst, src );
1096
1097
}
1098
1099
void sse_movhlps( struct x86_function *p,
1100
struct x86_reg dst,
1101
struct x86_reg src )
1102
{
1103
DUMP_RR( dst, src );
1104
assert(dst.mod == mod_REG && src.mod == mod_REG);
1105
emit_2ub(p, X86_TWOB, 0x12);
1106
emit_modrm( p, dst, src );
1107
}
1108
1109
void sse_movlhps( struct x86_function *p,
1110
struct x86_reg dst,
1111
struct x86_reg src )
1112
{
1113
DUMP_RR( dst, src );
1114
assert(dst.mod == mod_REG && src.mod == mod_REG);
1115
emit_2ub(p, X86_TWOB, 0x16);
1116
emit_modrm( p, dst, src );
1117
}
1118
1119
void sse_orps( struct x86_function *p,
1120
struct x86_reg dst,
1121
struct x86_reg src )
1122
{
1123
DUMP_RR( dst, src );
1124
emit_2ub(p, X86_TWOB, 0x56);
1125
emit_modrm( p, dst, src );
1126
}
1127
1128
void sse_xorps( struct x86_function *p,
1129
struct x86_reg dst,
1130
struct x86_reg src )
1131
{
1132
DUMP_RR( dst, src );
1133
emit_2ub(p, X86_TWOB, 0x57);
1134
emit_modrm( p, dst, src );
1135
}
1136
1137
void sse_cvtps2pi( struct x86_function *p,
1138
struct x86_reg dst,
1139
struct x86_reg src )
1140
{
1141
DUMP_RR( dst, src );
1142
assert(dst.file == file_MMX &&
1143
(src.file == file_XMM || src.mod != mod_REG));
1144
1145
p->need_emms = 1;
1146
1147
emit_2ub(p, X86_TWOB, 0x2d);
1148
emit_modrm( p, dst, src );
1149
}
1150
1151
void sse2_cvtdq2ps( struct x86_function *p,
1152
struct x86_reg dst,
1153
struct x86_reg src )
1154
{
1155
DUMP_RR( dst, src );
1156
emit_2ub(p, X86_TWOB, 0x5b);
1157
emit_modrm( p, dst, src );
1158
}
1159
1160
1161
/* Shufps can also be used to implement a reduced swizzle when dest ==
1162
* arg0.
1163
*/
1164
void sse_shufps( struct x86_function *p,
1165
struct x86_reg dst,
1166
struct x86_reg src,
1167
unsigned char shuf)
1168
{
1169
DUMP_RRI( dst, src, shuf );
1170
emit_2ub(p, X86_TWOB, 0xC6);
1171
emit_modrm(p, dst, src);
1172
emit_1ub(p, shuf);
1173
}
1174
1175
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1176
{
1177
DUMP_RR( dst, src );
1178
emit_2ub( p, X86_TWOB, 0x15 );
1179
emit_modrm( p, dst, src );
1180
}
1181
1182
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1183
{
1184
DUMP_RR( dst, src );
1185
emit_2ub( p, X86_TWOB, 0x14 );
1186
emit_modrm( p, dst, src );
1187
}
1188
1189
void sse_cmpps( struct x86_function *p,
1190
struct x86_reg dst,
1191
struct x86_reg src,
1192
enum sse_cc cc)
1193
{
1194
DUMP_RRI( dst, src, cc );
1195
emit_2ub(p, X86_TWOB, 0xC2);
1196
emit_modrm(p, dst, src);
1197
emit_1ub(p, cc);
1198
}
1199
1200
void sse_pmovmskb( struct x86_function *p,
1201
struct x86_reg dst,
1202
struct x86_reg src)
1203
{
1204
DUMP_RR( dst, src );
1205
emit_3ub(p, 0x66, X86_TWOB, 0xD7);
1206
emit_modrm(p, dst, src);
1207
}
1208
1209
void sse_movmskps( struct x86_function *p,
1210
struct x86_reg dst,
1211
struct x86_reg src)
1212
{
1213
DUMP_RR( dst, src );
1214
emit_2ub(p, X86_TWOB, 0x50);
1215
emit_modrm(p, dst, src);
1216
}
1217
1218
/***********************************************************************
1219
* SSE2 instructions
1220
*/
1221
1222
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1223
{
1224
DUMP_RR(dst, src);
1225
emit_2ub(p, 0x66, 0x0f);
1226
if(dst.mod == mod_REG && dst.file == file_REG32)
1227
{
1228
emit_1ub(p, 0x7e);
1229
emit_modrm(p, src, dst);
1230
}
1231
else
1232
{
1233
emit_op_modrm(p, 0x6e, 0x7e, dst, src);
1234
}
1235
}
1236
1237
void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1238
{
1239
DUMP_RR(dst, src);
1240
switch (dst.mod) {
1241
case mod_REG:
1242
emit_3ub(p, 0xf3, 0x0f, 0x7e);
1243
emit_modrm(p, dst, src);
1244
break;
1245
case mod_INDIRECT:
1246
case mod_DISP32:
1247
case mod_DISP8:
1248
assert(src.mod == mod_REG);
1249
emit_3ub(p, 0x66, 0x0f, 0xd6);
1250
emit_modrm(p, src, dst);
1251
break;
1252
default:
1253
assert(0);
1254
break;
1255
}
1256
}
1257
1258
void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1259
{
1260
DUMP_RR(dst, src);
1261
emit_2ub(p, 0xf3, 0x0f);
1262
emit_op_modrm(p, 0x6f, 0x7f, dst, src);
1263
}
1264
1265
void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1266
{
1267
DUMP_RR(dst, src);
1268
emit_2ub(p, 0x66, 0x0f);
1269
emit_op_modrm(p, 0x6f, 0x7f, dst, src);
1270
}
1271
1272
void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1273
{
1274
DUMP_RR(dst, src);
1275
emit_2ub(p, 0xf2, 0x0f);
1276
emit_op_modrm(p, 0x10, 0x11, dst, src);
1277
}
1278
1279
void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1280
{
1281
DUMP_RR(dst, src);
1282
emit_2ub(p, 0x66, 0x0f);
1283
emit_op_modrm(p, 0x10, 0x11, dst, src);
1284
}
1285
1286
void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1287
{
1288
DUMP_RR(dst, src);
1289
emit_2ub(p, 0x66, 0x0f);
1290
emit_op_modrm(p, 0x28, 0x29, dst, src);
1291
}
1292
1293
/**
1294
* Perform a reduced swizzle:
1295
*/
1296
void sse2_pshufd( struct x86_function *p,
1297
struct x86_reg dst,
1298
struct x86_reg src,
1299
unsigned char shuf)
1300
{
1301
DUMP_RRI( dst, src, shuf );
1302
emit_3ub(p, 0x66, X86_TWOB, 0x70);
1303
emit_modrm(p, dst, src);
1304
emit_1ub(p, shuf);
1305
}
1306
1307
void sse2_pshuflw( struct x86_function *p,
1308
struct x86_reg dst,
1309
struct x86_reg src,
1310
unsigned char shuf)
1311
{
1312
DUMP_RRI( dst, src, shuf );
1313
emit_3ub(p, 0xf2, X86_TWOB, 0x70);
1314
emit_modrm(p, dst, src);
1315
emit_1ub(p, shuf);
1316
}
1317
1318
void sse2_pshufhw( struct x86_function *p,
1319
struct x86_reg dst,
1320
struct x86_reg src,
1321
unsigned char shuf)
1322
{
1323
DUMP_RRI( dst, src, shuf );
1324
emit_3ub(p, 0xf3, X86_TWOB, 0x70);
1325
emit_modrm(p, dst, src);
1326
emit_1ub(p, shuf);
1327
}
1328
1329
void sse2_cvttps2dq( struct x86_function *p,
1330
struct x86_reg dst,
1331
struct x86_reg src )
1332
{
1333
DUMP_RR( dst, src );
1334
emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
1335
emit_modrm( p, dst, src );
1336
}
1337
1338
void sse2_cvtps2dq( struct x86_function *p,
1339
struct x86_reg dst,
1340
struct x86_reg src )
1341
{
1342
DUMP_RR( dst, src );
1343
emit_3ub(p, 0x66, X86_TWOB, 0x5B);
1344
emit_modrm( p, dst, src );
1345
}
1346
1347
void sse2_cvtsd2ss( struct x86_function *p,
1348
struct x86_reg dst,
1349
struct x86_reg src )
1350
{
1351
DUMP_RR( dst, src );
1352
emit_3ub(p, 0xf2, 0x0f, 0x5a);
1353
emit_modrm( p, dst, src );
1354
}
1355
1356
void sse2_cvtpd2ps( struct x86_function *p,
1357
struct x86_reg dst,
1358
struct x86_reg src )
1359
{
1360
DUMP_RR( dst, src );
1361
emit_3ub(p, 0x66, 0x0f, 0x5a);
1362
emit_modrm( p, dst, src );
1363
}
1364
1365
void sse2_packssdw( struct x86_function *p,
1366
struct x86_reg dst,
1367
struct x86_reg src )
1368
{
1369
DUMP_RR( dst, src );
1370
emit_3ub(p, 0x66, X86_TWOB, 0x6B);
1371
emit_modrm( p, dst, src );
1372
}
1373
1374
void sse2_packsswb( struct x86_function *p,
1375
struct x86_reg dst,
1376
struct x86_reg src )
1377
{
1378
DUMP_RR( dst, src );
1379
emit_3ub(p, 0x66, X86_TWOB, 0x63);
1380
emit_modrm( p, dst, src );
1381
}
1382
1383
void sse2_packuswb( struct x86_function *p,
1384
struct x86_reg dst,
1385
struct x86_reg src )
1386
{
1387
DUMP_RR( dst, src );
1388
emit_3ub(p, 0x66, X86_TWOB, 0x67);
1389
emit_modrm( p, dst, src );
1390
}
1391
1392
void sse2_punpcklbw( struct x86_function *p,
1393
struct x86_reg dst,
1394
struct x86_reg src )
1395
{
1396
DUMP_RR( dst, src );
1397
emit_3ub(p, 0x66, X86_TWOB, 0x60);
1398
emit_modrm( p, dst, src );
1399
}
1400
1401
void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1402
{
1403
DUMP_RR( dst, src );
1404
emit_3ub(p, 0x66, 0x0f, 0x61);
1405
emit_modrm( p, dst, src );
1406
}
1407
1408
void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1409
{
1410
DUMP_RR( dst, src );
1411
emit_3ub(p, 0x66, 0x0f, 0x62);
1412
emit_modrm( p, dst, src );
1413
}
1414
1415
void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1416
{
1417
DUMP_RR( dst, src );
1418
emit_3ub(p, 0x66, 0x0f, 0x6c);
1419
emit_modrm( p, dst, src );
1420
}
1421
1422
void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
1423
{
1424
DUMP_RI(dst, imm);
1425
emit_3ub(p, 0x66, 0x0f, 0x71);
1426
emit_modrm_noreg(p, 6, dst);
1427
emit_1ub(p, imm);
1428
}
1429
1430
void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
1431
{
1432
DUMP_RI(dst, imm);
1433
emit_3ub(p, 0x66, 0x0f, 0x72);
1434
emit_modrm_noreg(p, 6, dst);
1435
emit_1ub(p, imm);
1436
}
1437
1438
void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
1439
{
1440
DUMP_RI(dst, imm);
1441
emit_3ub(p, 0x66, 0x0f, 0x73);
1442
emit_modrm_noreg(p, 6, dst);
1443
emit_1ub(p, imm);
1444
}
1445
1446
void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
1447
{
1448
DUMP_RI(dst, imm);
1449
emit_3ub(p, 0x66, 0x0f, 0x71);
1450
emit_modrm_noreg(p, 2, dst);
1451
emit_1ub(p, imm);
1452
}
1453
1454
void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
1455
{
1456
DUMP_RI(dst, imm);
1457
emit_3ub(p, 0x66, 0x0f, 0x72);
1458
emit_modrm_noreg(p, 2, dst);
1459
emit_1ub(p, imm);
1460
}
1461
1462
void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
1463
{
1464
DUMP_RI(dst, imm);
1465
emit_3ub(p, 0x66, 0x0f, 0x73);
1466
emit_modrm_noreg(p, 2, dst);
1467
emit_1ub(p, imm);
1468
}
1469
1470
void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
1471
{
1472
DUMP_RI(dst, imm);
1473
emit_3ub(p, 0x66, 0x0f, 0x71);
1474
emit_modrm_noreg(p, 4, dst);
1475
emit_1ub(p, imm);
1476
}
1477
1478
void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
1479
{
1480
DUMP_RI(dst, imm);
1481
emit_3ub(p, 0x66, 0x0f, 0x72);
1482
emit_modrm_noreg(p, 4, dst);
1483
emit_1ub(p, imm);
1484
}
1485
1486
void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1487
{
1488
DUMP_RR(dst, src);
1489
emit_3ub(p, 0x66, 0x0f, 0xeb);
1490
emit_modrm(p, dst, src);
1491
}
1492
1493
void sse2_rcpps( struct x86_function *p,
1494
struct x86_reg dst,
1495
struct x86_reg src )
1496
{
1497
DUMP_RR( dst, src );
1498
emit_2ub(p, X86_TWOB, 0x53);
1499
emit_modrm( p, dst, src );
1500
}
1501
1502
void sse2_rcpss( struct x86_function *p,
1503
struct x86_reg dst,
1504
struct x86_reg src )
1505
{
1506
DUMP_RR( dst, src );
1507
emit_3ub(p, 0xF3, X86_TWOB, 0x53);
1508
emit_modrm( p, dst, src );
1509
}
1510
1511
/***********************************************************************
1512
* x87 instructions
1513
*/
1514
static void note_x87_pop( struct x86_function *p )
1515
{
1516
p->x87_stack--;
1517
assert(p->x87_stack >= 0);
1518
}
1519
1520
static void note_x87_push( struct x86_function *p )
1521
{
1522
p->x87_stack++;
1523
assert(p->x87_stack <= 7);
1524
}
1525
1526
void x87_assert_stack_empty( struct x86_function *p )
1527
{
1528
assert (p->x87_stack == 0);
1529
}
1530
1531
1532
void x87_fist( struct x86_function *p, struct x86_reg dst )
1533
{
1534
DUMP_R( dst );
1535
emit_1ub(p, 0xdb);
1536
emit_modrm_noreg(p, 2, dst);
1537
}
1538
1539
void x87_fistp( struct x86_function *p, struct x86_reg dst )
1540
{
1541
DUMP_R( dst );
1542
emit_1ub(p, 0xdb);
1543
emit_modrm_noreg(p, 3, dst);
1544
note_x87_pop(p);
1545
}
1546
1547
void x87_fild( struct x86_function *p, struct x86_reg arg )
1548
{
1549
DUMP_R( arg );
1550
emit_1ub(p, 0xdf);
1551
emit_modrm_noreg(p, 0, arg);
1552
note_x87_push(p);
1553
}
1554
1555
void x87_fldz( struct x86_function *p )
1556
{
1557
DUMP();
1558
emit_2ub(p, 0xd9, 0xee);
1559
note_x87_push(p);
1560
}
1561
1562
1563
void x87_fldcw( struct x86_function *p, struct x86_reg arg )
1564
{
1565
DUMP_R( arg );
1566
assert(arg.file == file_REG32);
1567
assert(arg.mod != mod_REG);
1568
emit_1ub(p, 0xd9);
1569
emit_modrm_noreg(p, 5, arg);
1570
}
1571
1572
void x87_fld1( struct x86_function *p )
1573
{
1574
DUMP();
1575
emit_2ub(p, 0xd9, 0xe8);
1576
note_x87_push(p);
1577
}
1578
1579
void x87_fldl2e( struct x86_function *p )
1580
{
1581
DUMP();
1582
emit_2ub(p, 0xd9, 0xea);
1583
note_x87_push(p);
1584
}
1585
1586
void x87_fldln2( struct x86_function *p )
1587
{
1588
DUMP();
1589
emit_2ub(p, 0xd9, 0xed);
1590
note_x87_push(p);
1591
}
1592
1593
void x87_fwait( struct x86_function *p )
1594
{
1595
DUMP();
1596
emit_1ub(p, 0x9b);
1597
}
1598
1599
void x87_fnclex( struct x86_function *p )
1600
{
1601
DUMP();
1602
emit_2ub(p, 0xdb, 0xe2);
1603
}
1604
1605
void x87_fclex( struct x86_function *p )
1606
{
1607
x87_fwait(p);
1608
x87_fnclex(p);
1609
}
1610
1611
void x87_fcmovb( struct x86_function *p, struct x86_reg arg )
1612
{
1613
DUMP_R( arg );
1614
assert(arg.file == file_x87);
1615
emit_2ub(p, 0xda, 0xc0+arg.idx);
1616
}
1617
1618
void x87_fcmove( struct x86_function *p, struct x86_reg arg )
1619
{
1620
DUMP_R( arg );
1621
assert(arg.file == file_x87);
1622
emit_2ub(p, 0xda, 0xc8+arg.idx);
1623
}
1624
1625
void x87_fcmovbe( struct x86_function *p, struct x86_reg arg )
1626
{
1627
DUMP_R( arg );
1628
assert(arg.file == file_x87);
1629
emit_2ub(p, 0xda, 0xd0+arg.idx);
1630
}
1631
1632
void x87_fcmovnb( struct x86_function *p, struct x86_reg arg )
1633
{
1634
DUMP_R( arg );
1635
assert(arg.file == file_x87);
1636
emit_2ub(p, 0xdb, 0xc0+arg.idx);
1637
}
1638
1639
void x87_fcmovne( struct x86_function *p, struct x86_reg arg )
1640
{
1641
DUMP_R( arg );
1642
assert(arg.file == file_x87);
1643
emit_2ub(p, 0xdb, 0xc8+arg.idx);
1644
}
1645
1646
void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg )
1647
{
1648
DUMP_R( arg );
1649
assert(arg.file == file_x87);
1650
emit_2ub(p, 0xdb, 0xd0+arg.idx);
1651
}
1652
1653
1654
1655
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
1656
unsigned char dst0ub0,
1657
unsigned char dst0ub1,
1658
unsigned char arg0ub0,
1659
unsigned char arg0ub1,
1660
unsigned char argmem_noreg)
1661
{
1662
assert(dst.file == file_x87);
1663
1664
if (arg.file == file_x87) {
1665
if (dst.idx == 0)
1666
emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
1667
else if (arg.idx == 0)
1668
emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
1669
else
1670
assert(0);
1671
}
1672
else if (dst.idx == 0) {
1673
assert(arg.file == file_REG32);
1674
emit_1ub(p, 0xd8);
1675
emit_modrm_noreg(p, argmem_noreg, arg);
1676
}
1677
else
1678
assert(0);
1679
}
1680
1681
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1682
{
1683
DUMP_RR( dst, src );
1684
x87_arith_op(p, dst, src,
1685
0xd8, 0xc8,
1686
0xdc, 0xc8,
1687
4);
1688
}
1689
1690
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1691
{
1692
DUMP_RR( dst, src );
1693
x87_arith_op(p, dst, src,
1694
0xd8, 0xe0,
1695
0xdc, 0xe8,
1696
4);
1697
}
1698
1699
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1700
{
1701
DUMP_RR( dst, src );
1702
x87_arith_op(p, dst, src,
1703
0xd8, 0xe8,
1704
0xdc, 0xe0,
1705
5);
1706
}
1707
1708
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1709
{
1710
DUMP_RR( dst, src );
1711
x87_arith_op(p, dst, src,
1712
0xd8, 0xc0,
1713
0xdc, 0xc0,
1714
0);
1715
}
1716
1717
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1718
{
1719
DUMP_RR( dst, src );
1720
x87_arith_op(p, dst, src,
1721
0xd8, 0xf0,
1722
0xdc, 0xf8,
1723
6);
1724
}
1725
1726
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1727
{
1728
DUMP_RR( dst, src );
1729
x87_arith_op(p, dst, src,
1730
0xd8, 0xf8,
1731
0xdc, 0xf0,
1732
7);
1733
}
1734
1735
void x87_fmulp( struct x86_function *p, struct x86_reg dst )
1736
{
1737
DUMP_R( dst );
1738
assert(dst.file == file_x87);
1739
assert(dst.idx >= 1);
1740
emit_2ub(p, 0xde, 0xc8+dst.idx);
1741
note_x87_pop(p);
1742
}
1743
1744
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
1745
{
1746
DUMP_R( dst );
1747
assert(dst.file == file_x87);
1748
assert(dst.idx >= 1);
1749
emit_2ub(p, 0xde, 0xe8+dst.idx);
1750
note_x87_pop(p);
1751
}
1752
1753
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
1754
{
1755
DUMP_R( dst );
1756
assert(dst.file == file_x87);
1757
assert(dst.idx >= 1);
1758
emit_2ub(p, 0xde, 0xe0+dst.idx);
1759
note_x87_pop(p);
1760
}
1761
1762
void x87_faddp( struct x86_function *p, struct x86_reg dst )
1763
{
1764
DUMP_R( dst );
1765
assert(dst.file == file_x87);
1766
assert(dst.idx >= 1);
1767
emit_2ub(p, 0xde, 0xc0+dst.idx);
1768
note_x87_pop(p);
1769
}
1770
1771
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
1772
{
1773
DUMP_R( dst );
1774
assert(dst.file == file_x87);
1775
assert(dst.idx >= 1);
1776
emit_2ub(p, 0xde, 0xf8+dst.idx);
1777
note_x87_pop(p);
1778
}
1779
1780
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
1781
{
1782
DUMP_R( dst );
1783
assert(dst.file == file_x87);
1784
assert(dst.idx >= 1);
1785
emit_2ub(p, 0xde, 0xf0+dst.idx);
1786
note_x87_pop(p);
1787
}
1788
1789
void x87_ftst( struct x86_function *p )
1790
{
1791
DUMP();
1792
emit_2ub(p, 0xd9, 0xe4);
1793
}
1794
1795
void x87_fucom( struct x86_function *p, struct x86_reg arg )
1796
{
1797
DUMP_R( arg );
1798
assert(arg.file == file_x87);
1799
emit_2ub(p, 0xdd, 0xe0+arg.idx);
1800
}
1801
1802
void x87_fucomp( struct x86_function *p, struct x86_reg arg )
1803
{
1804
DUMP_R( arg );
1805
assert(arg.file == file_x87);
1806
emit_2ub(p, 0xdd, 0xe8+arg.idx);
1807
note_x87_pop(p);
1808
}
1809
1810
void x87_fucompp( struct x86_function *p )
1811
{
1812
DUMP();
1813
emit_2ub(p, 0xda, 0xe9);
1814
note_x87_pop(p); /* pop twice */
1815
note_x87_pop(p); /* pop twice */
1816
}
1817
1818
void x87_fxch( struct x86_function *p, struct x86_reg arg )
1819
{
1820
DUMP_R( arg );
1821
assert(arg.file == file_x87);
1822
emit_2ub(p, 0xd9, 0xc8+arg.idx);
1823
}
1824
1825
void x87_fabs( struct x86_function *p )
1826
{
1827
DUMP();
1828
emit_2ub(p, 0xd9, 0xe1);
1829
}
1830
1831
void x87_fchs( struct x86_function *p )
1832
{
1833
DUMP();
1834
emit_2ub(p, 0xd9, 0xe0);
1835
}
1836
1837
void x87_fcos( struct x86_function *p )
1838
{
1839
DUMP();
1840
emit_2ub(p, 0xd9, 0xff);
1841
}
1842
1843
1844
void x87_fprndint( struct x86_function *p )
1845
{
1846
DUMP();
1847
emit_2ub(p, 0xd9, 0xfc);
1848
}
1849
1850
void x87_fscale( struct x86_function *p )
1851
{
1852
DUMP();
1853
emit_2ub(p, 0xd9, 0xfd);
1854
}
1855
1856
void x87_fsin( struct x86_function *p )
1857
{
1858
DUMP();
1859
emit_2ub(p, 0xd9, 0xfe);
1860
}
1861
1862
void x87_fsincos( struct x86_function *p )
1863
{
1864
DUMP();
1865
emit_2ub(p, 0xd9, 0xfb);
1866
}
1867
1868
void x87_fsqrt( struct x86_function *p )
1869
{
1870
DUMP();
1871
emit_2ub(p, 0xd9, 0xfa);
1872
}
1873
1874
void x87_fxtract( struct x86_function *p )
1875
{
1876
DUMP();
1877
emit_2ub(p, 0xd9, 0xf4);
1878
}
1879
1880
/* st0 = (2^st0)-1
1881
*
1882
* Restrictions: -1.0 <= st0 <= 1.0
1883
*/
1884
void x87_f2xm1( struct x86_function *p )
1885
{
1886
DUMP();
1887
emit_2ub(p, 0xd9, 0xf0);
1888
}
1889
1890
/* st1 = st1 * log2(st0);
1891
* pop_stack;
1892
*/
1893
void x87_fyl2x( struct x86_function *p )
1894
{
1895
DUMP();
1896
emit_2ub(p, 0xd9, 0xf1);
1897
note_x87_pop(p);
1898
}
1899
1900
/* st1 = st1 * log2(st0 + 1.0);
1901
* pop_stack;
1902
*
1903
* A fast operation, with restrictions: -.29 < st0 < .29
1904
*/
1905
void x87_fyl2xp1( struct x86_function *p )
1906
{
1907
DUMP();
1908
emit_2ub(p, 0xd9, 0xf9);
1909
note_x87_pop(p);
1910
}
1911
1912
1913
void x87_fld( struct x86_function *p, struct x86_reg arg )
1914
{
1915
DUMP_R( arg );
1916
if (arg.file == file_x87)
1917
emit_2ub(p, 0xd9, 0xc0 + arg.idx);
1918
else {
1919
emit_1ub(p, 0xd9);
1920
emit_modrm_noreg(p, 0, arg);
1921
}
1922
note_x87_push(p);
1923
}
1924
1925
void x87_fst( struct x86_function *p, struct x86_reg dst )
1926
{
1927
DUMP_R( dst );
1928
if (dst.file == file_x87)
1929
emit_2ub(p, 0xdd, 0xd0 + dst.idx);
1930
else {
1931
emit_1ub(p, 0xd9);
1932
emit_modrm_noreg(p, 2, dst);
1933
}
1934
}
1935
1936
void x87_fstp( struct x86_function *p, struct x86_reg dst )
1937
{
1938
DUMP_R( dst );
1939
if (dst.file == file_x87)
1940
emit_2ub(p, 0xdd, 0xd8 + dst.idx);
1941
else {
1942
emit_1ub(p, 0xd9);
1943
emit_modrm_noreg(p, 3, dst);
1944
}
1945
note_x87_pop(p);
1946
}
1947
1948
void x87_fpop( struct x86_function *p )
1949
{
1950
x87_fstp( p, x86_make_reg( file_x87, 0 ));
1951
}
1952
1953
1954
void x87_fcom( struct x86_function *p, struct x86_reg dst )
1955
{
1956
DUMP_R( dst );
1957
if (dst.file == file_x87)
1958
emit_2ub(p, 0xd8, 0xd0 + dst.idx);
1959
else {
1960
emit_1ub(p, 0xd8);
1961
emit_modrm_noreg(p, 2, dst);
1962
}
1963
}
1964
1965
1966
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
1967
{
1968
DUMP_R( dst );
1969
if (dst.file == file_x87)
1970
emit_2ub(p, 0xd8, 0xd8 + dst.idx);
1971
else {
1972
emit_1ub(p, 0xd8);
1973
emit_modrm_noreg(p, 3, dst);
1974
}
1975
note_x87_pop(p);
1976
}
1977
1978
void x87_fcomi( struct x86_function *p, struct x86_reg arg )
1979
{
1980
DUMP_R( arg );
1981
emit_2ub(p, 0xdb, 0xf0+arg.idx);
1982
}
1983
1984
void x87_fcomip( struct x86_function *p, struct x86_reg arg )
1985
{
1986
DUMP_R( arg );
1987
emit_2ub(p, 0xdb, 0xf0+arg.idx);
1988
note_x87_pop(p);
1989
}
1990
1991
1992
void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
1993
{
1994
DUMP_R( dst );
1995
assert(dst.file == file_REG32);
1996
1997
if (dst.idx == reg_AX &&
1998
dst.mod == mod_REG)
1999
emit_2ub(p, 0xdf, 0xe0);
2000
else {
2001
emit_1ub(p, 0xdd);
2002
emit_modrm_noreg(p, 7, dst);
2003
}
2004
}
2005
2006
2007
void x87_fnstcw( struct x86_function *p, struct x86_reg dst )
2008
{
2009
DUMP_R( dst );
2010
assert(dst.file == file_REG32);
2011
2012
emit_1ub(p, 0x9b); /* WAIT -- needed? */
2013
emit_1ub(p, 0xd9);
2014
emit_modrm_noreg(p, 7, dst);
2015
}
2016
2017
2018
2019
2020
/***********************************************************************
2021
* MMX instructions
2022
*/
2023
2024
void mmx_emms( struct x86_function *p )
2025
{
2026
DUMP();
2027
assert(p->need_emms);
2028
emit_2ub(p, 0x0f, 0x77);
2029
p->need_emms = 0;
2030
}
2031
2032
void mmx_packssdw( struct x86_function *p,
2033
struct x86_reg dst,
2034
struct x86_reg src )
2035
{
2036
DUMP_RR( dst, src );
2037
assert(dst.file == file_MMX &&
2038
(src.file == file_MMX || src.mod != mod_REG));
2039
2040
p->need_emms = 1;
2041
2042
emit_2ub(p, X86_TWOB, 0x6b);
2043
emit_modrm( p, dst, src );
2044
}
2045
2046
void mmx_packuswb( struct x86_function *p,
2047
struct x86_reg dst,
2048
struct x86_reg src )
2049
{
2050
DUMP_RR( dst, src );
2051
assert(dst.file == file_MMX &&
2052
(src.file == file_MMX || src.mod != mod_REG));
2053
2054
p->need_emms = 1;
2055
2056
emit_2ub(p, X86_TWOB, 0x67);
2057
emit_modrm( p, dst, src );
2058
}
2059
2060
void mmx_movd( struct x86_function *p,
2061
struct x86_reg dst,
2062
struct x86_reg src )
2063
{
2064
DUMP_RR( dst, src );
2065
p->need_emms = 1;
2066
emit_1ub(p, X86_TWOB);
2067
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
2068
}
2069
2070
void mmx_movq( struct x86_function *p,
2071
struct x86_reg dst,
2072
struct x86_reg src )
2073
{
2074
DUMP_RR( dst, src );
2075
p->need_emms = 1;
2076
emit_1ub(p, X86_TWOB);
2077
emit_op_modrm( p, 0x6f, 0x7f, dst, src );
2078
}
2079
2080
2081
/***********************************************************************
2082
* Helper functions
2083
*/
2084
2085
2086
void x86_cdecl_caller_push_regs( struct x86_function *p )
2087
{
2088
x86_push(p, x86_make_reg(file_REG32, reg_AX));
2089
x86_push(p, x86_make_reg(file_REG32, reg_CX));
2090
x86_push(p, x86_make_reg(file_REG32, reg_DX));
2091
}
2092
2093
void x86_cdecl_caller_pop_regs( struct x86_function *p )
2094
{
2095
x86_pop(p, x86_make_reg(file_REG32, reg_DX));
2096
x86_pop(p, x86_make_reg(file_REG32, reg_CX));
2097
x86_pop(p, x86_make_reg(file_REG32, reg_AX));
2098
}
2099
2100
2101
struct x86_reg x86_fn_arg( struct x86_function *p,
2102
unsigned arg )
2103
{
2104
switch(x86_target(p))
2105
{
2106
case X86_64_WIN64_ABI:
2107
/* Microsoft uses a different calling convention than the rest of the world */
2108
switch(arg)
2109
{
2110
case 1:
2111
return x86_make_reg(file_REG32, reg_CX);
2112
case 2:
2113
return x86_make_reg(file_REG32, reg_DX);
2114
case 3:
2115
return x86_make_reg(file_REG32, reg_R8);
2116
case 4:
2117
return x86_make_reg(file_REG32, reg_R9);
2118
default:
2119
/* Win64 allocates stack slots as if it pushed the first 4 arguments too */
2120
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
2121
p->stack_offset + arg * 8);
2122
}
2123
case X86_64_STD_ABI:
2124
switch(arg)
2125
{
2126
case 1:
2127
return x86_make_reg(file_REG32, reg_DI);
2128
case 2:
2129
return x86_make_reg(file_REG32, reg_SI);
2130
case 3:
2131
return x86_make_reg(file_REG32, reg_DX);
2132
case 4:
2133
return x86_make_reg(file_REG32, reg_CX);
2134
case 5:
2135
return x86_make_reg(file_REG32, reg_R8);
2136
case 6:
2137
return x86_make_reg(file_REG32, reg_R9);
2138
default:
2139
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
2140
p->stack_offset + (arg - 6) * 8); /* ??? */
2141
}
2142
case X86_32:
2143
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
2144
p->stack_offset + arg * 4); /* ??? */
2145
default:
2146
assert(0 && "Unexpected x86 target ABI in x86_fn_arg");
2147
return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */
2148
}
2149
}
2150
2151
static void x86_init_func_common( struct x86_function *p )
2152
{
2153
util_cpu_detect();
2154
p->caps = 0;
2155
if(util_get_cpu_caps()->has_mmx)
2156
p->caps |= X86_MMX;
2157
if(util_get_cpu_caps()->has_mmx2)
2158
p->caps |= X86_MMX2;
2159
if(util_get_cpu_caps()->has_sse)
2160
p->caps |= X86_SSE;
2161
if(util_get_cpu_caps()->has_sse2)
2162
p->caps |= X86_SSE2;
2163
if(util_get_cpu_caps()->has_sse3)
2164
p->caps |= X86_SSE3;
2165
if(util_get_cpu_caps()->has_sse4_1)
2166
p->caps |= X86_SSE4_1;
2167
p->csr = p->store;
2168
#if defined(PIPE_ARCH_X86)
2169
emit_1i(p, 0xfb1e0ff3);
2170
#else
2171
emit_1i(p, 0xfa1e0ff3);
2172
#endif
2173
DUMP_START();
2174
}
2175
2176
void x86_init_func( struct x86_function *p )
2177
{
2178
p->size = 0;
2179
p->store = NULL;
2180
x86_init_func_common(p);
2181
}
2182
2183
void x86_init_func_size( struct x86_function *p, unsigned code_size )
2184
{
2185
p->size = code_size;
2186
p->store = rtasm_exec_malloc(code_size);
2187
if (p->store == NULL) {
2188
p->store = p->error_overflow;
2189
}
2190
x86_init_func_common(p);
2191
}
2192
2193
void x86_release_func( struct x86_function *p )
2194
{
2195
if (p->store && p->store != p->error_overflow)
2196
rtasm_exec_free(p->store);
2197
2198
p->store = NULL;
2199
p->csr = NULL;
2200
p->size = 0;
2201
}
2202
2203
2204
static inline x86_func
2205
voidptr_to_x86_func(void *v)
2206
{
2207
union {
2208
void *v;
2209
x86_func f;
2210
} u;
2211
STATIC_ASSERT(sizeof(u.v) == sizeof(u.f));
2212
u.v = v;
2213
return u.f;
2214
}
2215
2216
2217
x86_func x86_get_func( struct x86_function *p )
2218
{
2219
DUMP_END();
2220
if (DISASSEM && p->store)
2221
debug_printf("disassemble %p %p\n", p->store, p->csr);
2222
2223
if (p->store == p->error_overflow)
2224
return voidptr_to_x86_func(NULL);
2225
else
2226
return voidptr_to_x86_func(p->store);
2227
}
2228
2229
#else
2230
2231
void x86sse_dummy( void );
2232
2233
void x86sse_dummy( void )
2234
{
2235
}
2236
2237
#endif
2238
2239